kingaa / pomp

R package for statistical inference using partially observed Markov processes
https://kingaa.github.io/pomp
GNU General Public License v3.0
110 stars 26 forks source link

Error in profiles.R file #117

Closed DimitarTerziev closed 3 years ago

DimitarTerziev commented 3 years ago

I am trying to reproduce your codes from https://royalsocietypublishing.org/action/downloadSupplement?doi=10.1098%2Frspb.2015.0347&file=rspb20150347supp1.pdf, and I realized that the profiles.R file does not work properly with that version of ebola.R, because there are some updates of the package "pomp", particularly for some of the arguments in pomp().

I have updated the file ebola.R as suggested in https://github.com/kingaa/pomp/tree/master/examples, but now I get this error when trying to estimate R0 (or "k") with profiles.R:

## Code:

require(pomp)
require(plyr)
require(reshape2)
require(magrittr)
options(stringsAsFactors=FALSE)

require(foreach)
require(doMPI)
require(iterators)

source("ebola.R")

foreach (country=c("GIN", "LBR", "SLE"),
         .inorder=TRUE,.combine=c) %:%
  foreach (type=c("raw","cum"),.inorder=TRUE,.combine=c) %do%
  {
    ebolaModel(country=country)
    } -> models
dim(models) <- c(2,3)
dimnames(models) <- list(c("raw","cum"),
                         c("GIN", "LBR", "SLE"))

noexport <- c("models")

cl <- startMPIcluster()
registerDoMPI(cl)

bake <- function (file, expr) {
  if (file.exists(file)) {
    readRDS(file)
    } else {
      val <- eval(expr)
      saveRDS(val,file=file)
      val
      }
  }

 trajectory matching: R0 profile

bake(file="tm-fits-R0.rds",{

  starts <- profileDesign(R0=seq(1,3,length=200),
                          upper=c(k=1),
                          lower=c(k=1e-8),
                          nprof=40)

  foreach (start=iter(starts,by='row'),
           .combine=rbind,.inorder=FALSE,
           .noexport=noexport,
           .options.mpi=list(chunkSize=100,seed=2016138277L,info=TRUE)
           ) %:%
    foreach (type=c("raw","cum"),.combine=rbind,.inorder=FALSE) %:%
    foreach (country=c("GIN", "LBR", "SLE"),
             .combine=rbind,.inorder=FALSE) %dopar%
    {
      tm <- models[type,country][[1]]
      tic <- Sys.time()
      coef(tm,names(start)) <- unname(unlist(start))
      coef(tm,"rho") <- 0.2
      tm <- traj.match(tm,est=c("k","E_0","I_0"),transform=TRUE)
      if (coef(tm,"k")==0) coef(tm,"k") <- 1e-8
      if (coef(tm,"E_0")==0) coef(tm,"E_0") <- 1e-12
      if (coef(tm,"I_0")==0) coef(tm,"I_0") <- 1e-12
      tm <- traj.match(tm,method='subplex',control=list(maxit=1e5))
      toc <- Sys.time()
      etime <- toc-tic
      units(etime) <- "hours"
      data.frame(country=country,type=type,as.list(coef(tm)),
                 loglik=logLik(tm),conv=tm$convergence,
                 etime=as.numeric(etime))
      } %>% mutate(sum=S_0+E_0+I_0+R_0,
                   S_0=round(N*S_0/sum),
                   E_0=round(N*E_0/sum),
                   I_0=round(N*I_0/sum),
                   R_0=round(N*R_0/sum)) %>%
    subset(conv %in% c(0,1),select=-sum) %>%
    unique()

  }) -> profR0

## Error:
Called from: doTryCatch(return(expr), name, parentenv, handler)

Error durante el wrapup: tipo no implementado (29) en 'eval'

Error: no more error handlers available (recursive errors?); invoking 'abort' restart
Error durante el wrapup: INTEGER() can only be applied to a 'integer', not a 'unknown type #29'
Error: no more error handlers available (recursive errors?); invoking 'abort' restart

The fact that I am working on Windows and I am not being able to install the package "doMPI" appropriately might be the reason for this problem, but I am not sure about it since I am not able to install Rmpi yet. However, I have tried with "doParallel()", but I get similar errors.

Thank you in advance.

kingaa commented 3 years ago

Lots of things have changed in pomp since that paper was written. Have a look at FAQ 1.4. If you want a short-cut to updated versions of those codes, they are available as part of the short course (Lesson 7 in particular).

The equivalent code and data are also built in to the package, as the function ebola.

And yes, I would recommend doParallel for Windows computers.

kingaa commented 3 years ago

Am closing this issue: feel free to re-open if more discussion is needed.

kingaa commented 3 years ago

@DimitarTerziev: In retrospect, I don't believe the above fully resolves the issues you have encountered. Consider the following codes, which are updated versions of portions of the archived code you have been working with. Do you see how to move forward from here?

library(pomp)
library(tidyverse)
library(foreach)
library(doParallel)
stopifnot(getRversion()>="4.0")
stopifnot(packageVersion("pomp")>="3.1")

WHO.situation.report.Oct.1 <- '
week,GIN,LBR,SLE
1,2.244,,
2,2.244,,
3,0.073,,
4,5.717,,
5,3.954,,
6,5.444,,
7,3.274,,
8,5.762,,
9,7.615,,
10,7.615,,
11,27.392,,
12,17.387,,
13,27.115,,
14,29.29,,
15,27.84,,
16,16.345,,
17,10.917,,
18,11.959,,
19,11.959,,
20,8.657,,
21,26.537,,
22,47.764,3.517,
23,26.582,1.043,5.494
24,32.967,18,57.048
25,18.707,16.34,76.022
26,24.322,13.742,36.768
27,4.719,10.155,81.929
28,7.081,24.856,102.632
29,8.527,53.294,69.823
30,92.227,70.146,81.783
31,26.423,139.269,99.775
32,16.549,65.66,88.17
33,36.819,240.645,90.489
34,92.08,274.826,161.54
35,101.03,215.56,168.966
36,102.113,388.553,186.144
37,83.016,410.299,220.442
38,106.674,300.989,258.693
39,55.522,240.237,299.546
'

## Population sizes in Guinea, Liberia, and Sierra Leone (census 2014)
populations <- c(GIN=10628972,LBR=4092310,SLE=6190280)
populations["WA"] <- sum(populations)

read_csv(WHO.situation.report.Oct.1) %>%
  mutate(
    date=seq(from=as.Date("2014-01-05"),length=length(week),by='week')
  ) %>%
  gather(country,cases,-week,-date) %>%
  mutate(deaths=NA) -> dat

##  Measurement model: hierarchical model for cases
## p(C_t | H_t): Negative binomial with mean rho*H_t and variance rho*H_t*(1+k*rho*H_t)
dObs <- Csnippet('
  double f;
  if (k > 0.0)
    f = dnbinom_mu(nearbyint(cases),1.0/k,rho*N_EI,1);
  else
    f = dpois(nearbyint(cases),rho*N_EI,1);
  lik = (give_log) ? f : exp(f);
')

rObs <- Csnippet('
  if (k > 0) {
    cases = rnbinom_mu(1.0/k,rho*N_EI);
    deaths = rnbinom_mu(1.0/k,rho*cfr*N_IR);
  } else {
    cases = rpois(rho*N_EI);
    deaths = rpois(rho*cfr*N_IR);
  }')

### measurement model for ordinary least-squares
dObsLS <- Csnippet('
  double f;
  f = dnorm(cases,rho*N_EI,k,1);
  lik = (give_log) ? f : exp(f);
')

rObsLS <- Csnippet('
  cases = rnorm(rho*N_EI,k);
  deaths = NA_REAL;
')

## Process model simulator
rSim <- Csnippet('
  double lambda, beta;
  double *E = &E1;
  beta = R0 * gamma; // Transmission rate
  lambda = beta * I / N; // Force of infection
  int i;

  // Transitions
  // From class S
  double transS = rbinom(S, 1.0 - exp(- lambda * dt)); // No of infections
  // From class E
  double transE[nstageE]; // No of transitions between classes E
  for(i = 0; i < nstageE; i++){
    transE[i] = rbinom(E[i], 1.0 - exp(- nstageE * alpha * dt));
  }
  // From class I
  double transI = rbinom(I, 1.0 - exp(- gamma * dt)); // No of transitions I->R

  // Balance the equations
  S -= transS;
  E[0] += transS - transE[0];
  for(i=1; i < nstageE; i++) {
    E[i] += transE[i-1] - transE[i];
  }
  I += transE[nstageE - 1] - transI;
  R += transI;
  N_EI += transE[nstageE - 1]; // No of transitions from E to I
  N_IR += transI; // No of transitions from I to R
')

## Deterministic skeleton (an ODE), used in trajectory matching
skel <- Csnippet('
  double lambda, beta;
  double *E = &E1;
  double *DE = &DE1;
  beta = R0 * gamma; // Transmission rate
  lambda = beta * I / N; // Force of infection
  int i;

  // Balance the equations
  DS = - lambda * S;
  DE[0] = lambda * S - nstageE * alpha * E[0];
  for (i=1; i < nstageE; i++)
    DE[i] = nstageE * alpha * (E[i-1]-E[i]);
  DI = nstageE * alpha * E[nstageE-1] - gamma * I;
  DR = gamma * I;
  DN_EI = nstageE * alpha * E[nstageE-1];
  DN_IR = gamma * I;
')

ebolaModel <- function (country=c("GIN", "SLE", "LBR", "WA"),
  data = NULL,
  timestep = 0.01, nstageE = 3L,
  type = c("raw","cum"), na.rm = FALSE, least.sq = FALSE) {

  type <- match.arg(type)
  ctry <- match.arg(country)
  pop <- unname(populations[ctry])

  ## Incubation period is supposed to be Gamma distributed with shape parameter 3
  ## and mean 11.4 days.  The discrete-time formula is used to calculate the
  ## corresponding alpha (cf He et al., Interface 2010).
  ## Case-fatality ratio is fixed at 0.7 (cf WHO Ebola response team, NEJM 2014)
  incubation_period <- 11.4/7
  infectious_period <- 7/7
  index_case <- 10/pop
  dt <- timestep
  nstageE <- as.integer(nstageE)

  globs <- paste0("static int nstageE = ",nstageE,";");

  theta <- c(
    N=pop,
    R0=1.4,
    alpha=-1/(nstageE*dt)*log(1-nstageE*dt/incubation_period),
    gamma=-log(1-dt/infectious_period)/dt,
    rho=0.2,cfr=0.7,
    k=0,
    S_0=1-index_case,
    E_0=index_case/2-5e-9,
    I_0=index_case/2-5e-9,
    R_0=1e-8
  )

  if (is.null(data)) {
    if (ctry=="WA") {
      dat %>%
        group_by(week) %>%
        summarize(
          cases=sum(cases,na.rm=TRUE),
          deaths=sum(deaths.na.rm=TRUE)
        ) -> dat
    } else {
      dat %>%
        filter(country==ctry) %>%
        select(-country) -> dat
    }
  } else {
    dat <- data
  }

  if (na.rm) {
    dat %>%
      filter(!is.na(cases)) %>%
      mutate(week=week-min(week)+1) -> dat
  }
  if (type=="cum") {
    dat %>%
      mutate(
        cases=cumsum(cases),
        deaths=cumsum(deaths)
      ) -> dat
  }

  ## Create the pomp object
  dat %>%
    select(week,cases,deaths) %>%
    pomp(
      times="week", t0=0,
      params=theta,
      globals=globs,
      accumvars=if (type=="raw") c("N_EI","N_IR") else character(0),
      statenames=c("S","E1","I","R","N_EI","N_IR"),
      paramnames=c("N","R0","alpha","gamma","rho","k","cfr",
        "S_0","E_0","I_0","R_0"),
      nstageE=nstageE,
      dmeasure=if (least.sq) dObsLS else dObs,
      rmeasure=if (least.sq) rObsLS else rObs,
      rprocess=discrete_time(rSim,delta.t=timestep),
      skeleton=vectorfield(skel),
      partrans=parameter_trans(
        log=c("R0","k"), logit="rho",
        barycentric=c("S_0","E_0","I_0","R_0")
      ),
      rinit=function (S_0, E_0, I_0, R_0, t0, nstageE, N, ...) {
        all.state.names <- c("S",paste0("E",1:nstageE),"I","R","N_EI","N_IR")
        comp.names <- c("S",paste0("E",1:nstageE),"I","R")
        x0 <- setNames(numeric(length(all.state.names)),all.state.names)
        frac <- c(S_0,rep(E_0/nstageE,nstageE),I_0,R_0)
        x0[comp.names] <- round(N*frac/sum(frac))
        x0
      }
    ) -> po
}

expand_grid(
  country=c("SLE","LBR","GIN","WA"),
  datatype=c("raw","cum")
) -> model_cases

foreach (m=iter(model_cases,"row"),
  .inorder=TRUE,.combine=c) %do%
  {
    ebolaModel(country=m$country,type=m$datatype,na.rm=TRUE)
  } %>%
  set_names(
    model_cases %>%
      unite(name,country,datatype) %>%
      pull(name)
  ) -> models

## trajectory matching: R0 profile

library(doParallel)
registerDoParallel()

profile_design(
  country=c("SLE","LBR","GIN","WA"),
  datatype=c("raw","cum"),
  R0=seq(1,3,length=20),
  rho=0.2,
  upper=c(k=1),
  lower=c(k=1e-8),
  nprof=40
) %>%
  unite(name,country,datatype) -> starts

estnames <- c("k","E_0","I_0")

foreach (start=iter(starts,by='row'),
  .combine=bind_rows,.inorder=FALSE) %dopar%
  {
    library(pomp)
    library(subplex)
    tm <- models[[start$name]]
    tic <- Sys.time()
    coef(tm) -> theta
    start %>% select(-name) -> guess
    theta[names(guess)] <- unlist(guess)
    tm %>%
      traj_objfun(
        est=c("k","E_0","I_0"),
        params=theta
      ) -> tm
    subplex(
      fn=tm,
      par=coef(tm,pars=estnames,transform=TRUE),
      control=list(maxit=1e5)
    ) -> fit
    tm(fit$par) # ensures fitted parameters are stored in 'tm'
    toc <- Sys.time()
    etime <- toc-tic
    units(etime) <- "hours"
    bind_cols(name=start$name,t(coef(tm)),
      logLik=logLik(tm),
      conv=fit$convergence,
      etime=as.numeric(etime)
    )
  } %>% mutate(
          sum=S_0+E_0+I_0+R_0,
          S_0=round(N*S_0/sum),
          E_0=round(N*E_0/sum),
          I_0=round(N*I_0/sum),
          R_0=round(N*R_0/sum)
        ) %>%
  filter(conv %in% c(0,1)) %>%
  select(-sum) %>%
  separate(name,into=c("country","type")) -> profR0

profR0 %>%
  group_by(country,type) %>%
  mutate(logLik=logLik-max(logLik)) %>%
  group_by(country,type,R0) %>%
  filter(logLik==max(logLik)) %>%
  ggplot(aes(x=R0,y=logLik,color=type,group=type))+
  geom_point()+
  geom_line()+
  facet_wrap(~country)+
  theme_bw()
kingaa commented 3 years ago

@DimitarTerziev: I am closing this issue on the assumption that you have resolved the problem. If that is not the case, or you wish to discuss more, feel free to reopen the issue.