kingaa / pomp

R package for statistical inference using partially observed Markov processes
https://kingaa.github.io/pomp
GNU General Public License v3.0
111 stars 27 forks source link

Error: in ‘bsmc2’: ‘dmeasure’ with log=TRUE returns illegal value #152

Closed kingaa closed 3 years ago

kingaa commented 3 years ago

Recently, some colleagues (from an economic background) working on vineyard dieback came to me with the goal of extracting signals from data they have. Looking at their model, I thought POMP would be suitable as opposed to STAN (they planned to use). Because I have never used POMP myself, I tried to play with it myself. However, when I tried to use a Bayesian sequential monte carlo method on simulated data, I get the error in ‘bsmc2’: ‘dmeasure’ with log=TRUE returns illegal value. Part of the error message shows, I believe that the estimate of the likelihood at the initial guess is -Inf. After a long investigation, I still could not figure out what I was doing wrong.

library(tidyverse)
library(pomp)

rproc <- Csnippet("
  double n_HE = rbinom(H,1-exp(-(Beta_HE*(C1+C2+I) + mu_E)*dt));
  double n_EC1 = rbinom(E,1-exp(-Beta_EC1*dt));
  double n_C1I = rbinom(C1,1-exp(-Beta_C1I*dt));
  double n_IC2 = rbinom(I,1-exp(-Beta_IC2*dt));
  double n_C2I = rbinom(C2,1-exp(-Beta_C2I*dt));
  double n_IR = rbinom(I,1-exp(-Beta_IR*dt));
  double n_C1R = rbinom(C1,1-exp(-Beta_C1R*dt));
  double n_C2R = rbinom(C2,1-exp(-Beta_C2R*dt));
  H  = H - n_HE;
  E  = E + n_HE - n_EC1;
  C1 = C1 + n_EC1 - n_C1I - n_C1R ;
  C2 = C2 + n_IC2 - n_C2I - n_C2R ;
  I  = I + n_C2I + n_C1I - n_IC2 - n_IR;
  R  = R + n_IR + n_C1R + n_C2R;

  if (!(R_FINITE(C2)|R_FINITE(I))) Rprintf(\"%lg %lg %lg %lg %lg %lg %lg %lg %lg  %lg %lg %lg\\n\",
    H,E,C2,C1,I,R,n_HE,n_EC1,n_C1I,n_IC2,n_C2I,n_C2R);

")

init <- Csnippet("
  H  = H_0;
  E  = E_0;
  C1 = C1_0;
  C2 = C2_0;
  I  = I_0;
  R  = N - H_0 - E_0 - C1_0 - C2_0 - I_0;
")

statenames <- c("H","E","C1","C2","I","R")

t0 <- 0

rp_names <-  c("Beta_HE","Beta_EC1","Beta_C1I","Beta_C2I",
               "Beta_IC2","Beta_IR","Beta_C1R","Beta_C2R",
               "mu_E","N","rho_H","rho_I","rho_R","H_0","E_0",
               "C1_0","C2_0","I_0")

dmeas <- Csnippet("
  lik = dbinom(H_1,H + E + C2,rho_H,1) +
        dbinom(I_1,I,rho_I,1);
        //dbinom(R_1,R,rho_R,1);
        lik = (give_log) ? lik : exp(lik);
        if (!R_FINITE(lik)) Rprintf(\"%lg %lg %lg %lg\\n\",rho_H,rho_I,rho_R,lik,H_1,I_1);

")

rmeas <- Csnippet("
   H_1 = rbinom(H + E + C2,rho_H);
   I_1 = rbinom(I,rho_I);
   //R_1 = rbinom(R,rho_R);
")

Simulate data:

simulate(
  times=seq(0,15,1),t0=0,
  params=c(Beta_HE=0.001,Beta_EC1=0.02,Beta_C1I=0.05,
    Beta_C2I=0.05,Beta_IC2=0.02,Beta_IR=1/15,
    Beta_C1R=1/40,Beta_C2R=1/40,mu_E=0.001,N=10000,
    rho_H=0.923505,rho_I=0.9,rho_R=0.95,
    H_0=9950,E_0=10,C1_0=10,C2_0=10,I_0=10),
  rprocess=euler(rproc, delta.t = 1),
  rinit=init,
  rmeasure=rmeas,
  dmeasure=dmeas,
  statenames=statenames,
  paramnames=rp_names,
  obsnames=c("H_1","I_1"),
  partrans=parameter_trans(log=c("rho_H","rho_I","rho_R"))
) -> depr

Bayesian sequential Monte Carlo:

set.seed(136872209L)
Np <- 1000

rprior <- Csnippet("
  rho_H = rbeta(40,10);
  rho_I = rbeta(90,10);")

dprior <- Csnippet("
    lik = dbeta(rho_H,40,10,1) +
            dbeta(rho_I,90,10,1);
    if (!give_log) lik = exp(lik);")

fit1 <- bsmc2(
  depr,
  Np=Np,rprior=rprior,dprior=dprior,
  smooth=0.2,
  paramnames=rp_names,
  partrans=parameter_trans(log=c("rho_H","rho_I"))
)

Error messages:

Error: in ‘bsmc2’: ‘dmeasure’ with log=TRUE returns illegal value.
Log likelihood, data, states, and parameters are:
    time:            1
  loglik:         -Inf
     H_1:         9222
     I_1:           12
       H:         9633
       E:          327
      C1:           10
      C2:           10
       I:            9
       R:           11
 Beta_HE:        0.001
Beta_EC1:         0.02
Beta_C1I:         0.05
Beta_C2I:         0.05
Beta_IC2:         0.02
 Beta_IR:    0.0666667
Beta_C1R:        0.025
Beta_C2R:        0.025
    mu_E:        0.001
       N:        10000
   rho_H:      0.92292
   rho_I:     0.898477
   rho_R:         0.95
     H_0:         9950
     E_0:           10
    C1_0:           10
    C2_0:           10
     I_0:           10

Diagnostics:

-----------------------------------
 contents of user Renviron file 
-----------------------------------
PATH="${RTOOLS40_HOME}\usr\bin;${PATH}"

-----------------------------------
 contents of site Rprofile file 
-----------------------------------
# Things you might want to change

# options(papersize="a4")
# options(editor="notepad")
# options(pager="internal")

# set the default help type
# options(help_type="text")
  options(help_type="html")

# set a site library
# .Library.site <- file.path(chartr("\\", "/", R.home()), "site-library")

# set a CRAN mirror
# local({r <- getOption("repos")
#       r["CRAN"] <- "http://my.local.cran"
#       options(repos=r)})

# Give a fortune cookie, but only to interactive sessions
# (This would need the fortunes package to be installed.)
#  if (interactive()) 
#    fortunes::fortune()

-----------------------------------
 sessionInfo 
-----------------------------------
R version 4.1.0 (2021-05-18)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19041)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets 
[7] methods   base     

other attached packages:
 [1] doParallel_1.0.16 iterators_1.0.13  foreach_1.5.1    
 [4] pomp_3.4          forcats_0.5.1     stringr_1.4.0    
 [7] dplyr_1.0.7       purrr_0.3.4       readr_1.4.0      
[10] tidyr_1.1.3       tibble_3.1.2      ggplot2_3.3.4    
[13] tidyverse_1.3.1  

loaded via a namespace (and not attached):
 [1] deSolve_1.28     tidyselect_1.1.1 reshape2_1.4.4  
 [4] haven_2.4.1      lattice_0.20-44  colorspace_2.0-1
 [7] vctrs_0.3.8      generics_0.1.0   utf8_1.2.1      
[10] rlang_0.4.11     pillar_1.6.1     glue_1.4.2      
[13] withr_2.4.2      DBI_1.1.1        dbplyr_2.1.1    
[16] modelr_0.1.8     readxl_1.3.1     lifecycle_1.0.0 
[19] plyr_1.8.6       munsell_0.5.0    gtable_0.3.0    
[22] cellranger_1.1.0 rvest_1.0.0      mvtnorm_1.1-2   
[25] codetools_0.2-18 coda_0.19-4      labeling_0.4.2  
[28] fansi_0.5.0      broom_0.7.7      Rcpp_1.0.6      
[31] scales_1.1.1     backports_1.2.1  jsonlite_1.7.2  
[34] farver_2.1.0     fs_1.5.0         hms_1.1.0       
[37] digest_0.6.27    stringi_1.6.2    grid_4.1.0      
[40] cli_2.5.0        tools_4.1.0      magrittr_2.0.1  
[43] crayon_1.4.1     pkgconfig_2.0.3  ellipsis_0.3.2  
[46] xml2_1.3.2       reprex_2.0.0     lubridate_1.7.10
[49] assertthat_0.2.1 httr_1.4.2       rstudioapi_0.13 
[52] R6_2.5.0         compiler_4.1.0  
kingaa commented 3 years ago

It appears that the problem is to do with the fact that you sometimes have I < I_1, in contradiction to your assumption that I_1 ~ Binomial(I,rho_I). This is evident in the error message, which prints the values of the state variables and the data (and also the parameters). To diagnose this, I ran your codes and noticed the messages coming from the dmeas snippet. I modified that snippet as follows. This allowed me to see that the error was arising because I < I_1.

dmeas <- Csnippet("
  lik = dbinom(H_1,H + E + C2,rho_H,1) +
          dbinom(I_1,I,rho_I,1);
  lik = (give_log) ? lik : exp(lik);
  if (!R_FINITE(lik)) Rprintf(\"dmeas: %lg %lg %lg %lg %lg %lg %lg\\n\",rho_H,rho_I,lik,H_1,I_1,H+E+C2,I);
")

However, there is nothing wrong with having a log likelihood of -Inf (likelihood = 0) in your model. Therefore, this is a bug in bsmc2. I have just committed a bug fix: it appears in version 3.4.2.0. Can you install it from source and have a look?

kingaa commented 3 years ago

Thanks for your quick reply. It works now.

holaanna commented 2 years ago

I am facing the same issue but with a log likelihood of Nan (likelihood=-Inf) in the model. This error appears in both bmsc2 and pmcmc. Is it not another bug? It supposes to reject such moves I think.

kingaa commented 2 years ago

Seems more likely that the bug is in your code, since likelihoods should never be negative. See also FAQ 4.1.