kingaa / pomp

R package for statistical inference using partially observed Markov processes
https://kingaa.github.io/pomp
GNU General Public License v3.0
111 stars 27 forks source link

Can we fit the time-series with the weighted likelihood? #169

Closed Spatial-R closed 2 years ago

Spatial-R commented 2 years ago

I wanted to ask for your suggestion as to what is the most effective way of fitting the surevillance data. Currently, I want to use the mathematical model to fit the long-term time-series data. However, the vast majority of time points are in the non-epidemic period, when the number of monitored cases is at a low value. Thus, the model seems to focus on fitting these low values, and the fit of the epidemic surveillance data is poorer.

image

Therefore, is it possible to give greater weight to the surveillance data in the epidemic period when calculating the likelihood values in pomp package? Because I would prefer that the model fit these epidemic surveillance data well, even if the non-epidemic fits are mediocre.

Following is my codes and the link to the data. I appreciate your insightful comments.

The version of pomp package is 2.8.

library(pomp)
library(magrittr)
library(dplyr)
library(stringi)
library(ggplot2)
library(reshape)

load("Test.RData")

rproc <- Csnippet("
  double beta,foi,birthsnew;
  double rate[17], trans[17];
  double mu = 0.007;
  const double *cpv = &cpv1;
  const double *cpd = &cpd1;
  const double *qm = &qm1;

  // transmission rate
  double qmc;
  int v;
  double qmctm = 0;

  for (v=0; v < K ; v++) {
  qmctm += exp(-5*decay*cpd[v])*cpv[v]*qm[v];
  }

  qmc =  365*(qmctm/(qmctm+1));
  if(qmc < 1) qmc = 1;

  if(S < 1) S = 1;
  if(V < 1) V = 1;
  if(I < 1) I = 1;
  if(R < 1) R = 1;
  if(RHT < 1) RHT = 1;
  if(RBT < 1) RBT = 1;

  double seas;

  if (holiday == 0)
  seas = 1.0+amplitude;
  else
  seas = 1.0-amplitude;

  double envrt;

  if(AH > (24*Rt)){
  envrt = sca*(AH-24*Rt)+2*Rt0;
  } else {
  envrt = sca1*(24*Rt - AH)+2*Rt0;                          
  }             

  beta =  seas*envrt*gamma;
  foi  = pow(I+100*iota,alpha)*beta/pop;    // add the effect of temp on suspect and

  double AH1 = nearbyint(H3_A/(0.1*rhoH1)*dt); // AH1(t): the case with H1N1 infection
  double BYV = nearbyint(B_A/(0.1*rhoH2)*dt); // AB1(t): the case with H1N1 infection

  //dw = rgammawn(sigmaSE,dt);

  rate[0] = foi;             // stochastic force of infection
  rate[1] = mu;              // natural S death
  rate[2] = vac*365/7;             // S-V

  rate[3] = mu;              // natural V death
  rate[4] = 5*tff*qmc*sigma;      // V-S
  rate[5] = foi*(1-VEh1);        // V-I

  rate[6] = gamma;           // I-R
  rate[7] = mu;              // natural I death

  rate[8] = mu;              // natural R death
  rate[9] = sigma*qmc;       // natural R to S  +lolg*epslomH3NA
  rate[10] = vac*365/7;             // R-V

  rate[11] = mu;              // RH1 death
  rate[12] = 180*sigmaH1;     // RH1 to S
  rate[13] = vac*365/7;     // RH1 to V

  rate[14] = mu;              // RHB death
  rate[15] = 180*sigmaHY;     // RHB to S
  rate[16] = vac*365/7;     // RH1 to V

  // Poisson births
  birthsnew = rpois(birth*dt); // the birth population

  // transitions between classes
  reulermultinom(3,S,&rate[0],dt,&trans[0]);
  reulermultinom(3,V,&rate[3],dt,&trans[3]);
  reulermultinom(2,I,&rate[6],dt,&trans[6]);
  reulermultinom(3,R,&rate[8],dt,&trans[8]);
  reulermultinom(3,RHT,&rate[11],dt,&trans[11]); 
  reulermultinom(3,RBT,&rate[14],dt,&trans[14]);            

  S += birthsnew   + trans[9] + trans[4] + trans[12]+ trans[15] - trans[0] - trans[1] - trans[2] - AH1 - BYV ;
  V += trans[2] +trans[10] + trans[13] + trans[16]- trans[3] - trans[4] - trans[5]; 
  I += trans[0] + trans[5] - trans[6] - trans[7];
  R += trans[6] - trans[8] - trans[9] - trans[10];
  RHT += AH1 - trans[11] - trans[12] - trans[13];
  RBT += BYV - trans[14] - trans[15] - trans[16];              
  C += trans[0] + trans[5];           // true incidence"
)

initlz <- Csnippet("
 double m = pop/(S_0+0.5+I_0+R_0+RHT_0 +RBT_0 +V_0);
 S = nearbyint(m*(S_0+0.5));
 I = nearbyint(m*I_0);
 R = nearbyint(m*R_0);
 RHT = nearbyint(m*RHT_0);
 RBT = nearbyint(m*RBT_0);
 V = nearbyint(m*V_0);
 C = 0;"
)

rmeas <- Csnippet("
  double tol = 1.0e-60;
  double v;                  
  double rhot,phitem;

  if((t - floor(t)) < 0.25 || (t - floor(t)) > 0.75){
  phitem  = phi;
  rhot = rhos;
  } else {
  phitem = phi;
  rhot = rho;
  }
  double m = rhot*C;
  v = m*(1.0-rhot+phitem*phitem*m);

  cases = rnorm(m,sqrt(v)+tol);
  if (cases > 0.0) {
  cases = nearbyint(cases);
  } else {
  cases = 0.0;
  }"
)

dmeas <- Csnippet("
  double tol = 1.0e-60;
  double v;
  double rhot,phitem;

  if((t - floor(t)) < 0.25 || (t - floor(t)) > 0.75){
  phitem  = phi;
  rhot = rhos;
  } else {
  phitem = phi;
  rhot = rho;
  }
  double lik0;
  double m = rhot*C;
  v = m*(1.0- rhot+phitem*phitem*m);
  if (cases > 0.0) {
  lik0 = log(pnorm(cases+0.5,m,sqrt(v)+tol,1,0)-pnorm(cases-0.5,m,sqrt(v)+tol,1,0)+tol);
  } else {
  lik0 = log(pnorm(cases+0.5,m,sqrt(v)+tol,1,0)+tol);
  }
  lik = (give_log) ? lik0 : exp(lik0);"
)

parameter_seir <- c("phi","rhoH1","iota","Rt0","Rt","sca","sca1",
  "phis","S_0","I_0","R_0","alpha","amplitude","decay","rhos","tff",
  "sigma","gamma","RHT_0","sigmaH1","rho","sigmaHY","rhoH2","RBT_0","V_0",
  paste0("qm",1:4))

case.dat |>
  pomp(t0= with(case.dat,2*week[1] - week[2]),
    time="week",
    rprocess=euler(step.fun=rproc,delta.t= 2/365.25),
    rinit=initlz,
    dmeasure=dmeas,
    rmeasure=rmeas,
    covar= covariate_table(covar.dat,times = "week"),
    accumvars = c("C"),
    globals = measles_globals,
    paramnames = parameter_seir,
    statenames=c("S","I","R","RHT","RBT","V","C"),
    partrans=parameter_trans(log=c("alpha"),
      logit = c("phi","sigma","amplitude","rho","rhos","Rt0","Rt","sca","sca1",
        "iota",paste0("qm",1:4),"decay","sigmaH1","tff",
        "sigmaHY","rhoH2","RBT_0","phis","V_0",
        "S_0","I_0","R_0","RHT_0","rhoH1"))
  ) -> origin.model

guess.param$sigma <- c(0.16,0.16)
guess.param <- guess.param[,match(parameter_seir,names(guess.param))]

library(doSNOW)
mcors <- 80
random.number <- 560
Np <- 3000
Nmif <- 100
rw.sd_rp <- 0.002
rw.sd_ivp <- 0.002

ivp_para <- c("S_0","I_0","R_0","RHT_0","RBT_0","V_0"); ivp_pos <- match(ivp_para,parameter_seir)
parameter_seir_nivp <- parameter_seir[-(ivp_pos)]
length_para <- length(parameter_seir_nivp)
fix_col <- c(1:length_para)[!parameter_seir_nivp[c(1:length_para)] %in% c("alpha","gamma","sigma")]
rw_text_nivp <- paste(paste(parameter_seir_nivp[fix_col],"=",rw.sd_rp,sep = ""),collapse = ",")
rw_text_ivp <- paste(paste(ivp_para,"=",rw.sd_ivp,"",sep = ""),collapse = ",")
rw_text <- paste("rw.sd(",rw_text_nivp,",",rw_text_ivp,")",sep="")
parameter.rw.sd <- eval(parse(text = rw_text))

cl <- makeCluster(mcors,type = "SOCK"); registerDoSNOW(cl)
set.seed(998468235L,kind = "L'Ecuyer")
mcor.set <- list(preschedule = FALSE,set.seed = TRUE)

global.result <- foreach(i = 1:random.number, .combine=rbind,
  .packages = c("pomp","magrittr"),.errorhandling = "remove",
  .inorder = FALSE, .options.multicore = mcor.set) %dopar%  {

    start_params <- apply(guess.param,2,function(data)runif(1,data[1],data[2]))

    mif2(origin.model, params = start_params, Np = Np,Nmif = Nmif,
      cooling.type = "geometric",cooling.fraction.50 = 0.5,
      rw.sd = parameter.rw.sd) |>  mif2() -> mf
    pf <- replicate(10, pfilter(mf, Np = Np))
    ll <- sapply(pf,logLik)
    ll <-logmeanexp(ll, se = TRUE)
    data.frame(as.list(coef(mf)),
      loglik = ll[1],
      loglik.se = ll[2])
  }
global.result <- arrange(global.result,desc(loglik))
write.csv(global.result,file=paste0("H1N1_Rho_Vac_VS_AHU_U",".csv"),row.names=F)

stopCluster(cl)
registerDoSEQ()
kingaa commented 2 years ago

The question of "weighting" the data is equivalent to the question of what form the measurement model takes. The larger the variance of the measurement model, the smaller the weight that is associated with the data. In other words, if you don't believe that the low values in the data are precise measurements, this should be reflected in your measurement model as a large variance, which in turn will cause these data to be down-weighted in the likelihood calculation. If you do believe that the measurements are precise, then why would you want to ignore them?

So the first question is: Is the evident lack of fit a problem with the data or with the model (or both)? In particular, what is the precision of the various measurements? Is this properly reflected in your measurement model? If it is, then the next question is, Why is my model capable of fitting the low periods, or the high periods, but not both?

Also, may I ask why you are using such an out-of-date version of pomp?

Spatial-R commented 2 years ago

Thanks!I will try the newest version of pomp and reset the measurement model.

kingaa commented 2 years ago

I will close this issue for now. Feel free to re-open if more discussion is warranted.