IALSA / ialsa-2016-amsterdam

Multi-study and multivariate evaluation of healthy life expectancy (HLE): An IALSA workshop on multistate modeling using R
GNU General Public License v2.0
0 stars 0 forks source link

Currenet state of models in MAP #20

Open andkov opened 8 years ago

andkov commented 8 years ago

Current state of modeling

> ds <- readRDS("./data/unshared/derived/multistate_mmse.rds")
> 
> 
> cat("\nState table:"); print(statetable.msm(state,id,data=ds))

State table:    to
from   -2   -1    1    2    3    4
  -2   32    0    0    0    0    0
  -1    0   25   27   13   27   50
  1    32   59 4855  715  120  251
  2     8   20  534  478  257  146
  3     6   34   24   97  649  233
> ds %>% dplyr::summarise(unique_ids = n_distinct(id)) # subject count
  unique_ids
1       1696
> ds %>% dplyr::group_by(state) %>% dplyr::summarize(count = n()) # basic frequiencies
Source: local data frame [6 x 2]

  state count
  (dbl) (int)
1    -2    79
2    -1   142
3     1  6720
4     2  1605
5     3  1162
6     4   680
> # NOTE: -2 is a right censored value, indicating being alive but in an unknown living state.
> 
> (N <- length(unique(ds$id)))
[1] 1696
> subjects <- as.numeric(unique(ds$id))
> 
> # Add first observation indicator
> # this creates a new dummy variable "firstobs" with 1 for the frist wave
> cat("\nFirst observation indicator is added.\n")

First observation indicator is added.
> offset <- rep(NA,N)
> for(i in 1:N){offset[i] <- min(which(ds$id==subjects[i]))}
> firstobs <- rep(0,nrow(ds))
> firstobs[offset] <- 1
> ds <- cbind(ds,firstobs=firstobs)
> head(ds)
     id  male      age state firstobs
1  9121 FALSE 79.96988     1        1
2  9121 FALSE 81.08145     1        0
3  9121 FALSE 81.61259     1        0
4  9121 FALSE 82.59548     1        0
5  9121 FALSE 83.62218     1        0
6 33027 FALSE 81.00753     1        1
> 
> # Time intervals in data:
> # the age difference between timepoint for each individual
> intervals <- matrix(NA,nrow(ds),2)
> for(i in 2:nrow(ds)){
+   if(ds$id[i]==ds$id[i-1]){
+     intervals[i,1] <- ds$id[i]
+     intervals[i,2] <- ds$age[i]-ds$age[i-1]
+   }
+   intervals <- as.data.frame(intervals)
+   colnames(intervals) <- c("id", "interval")
+ }
> head(intervals)
    id  interval
1   NA        NA
2 9121 1.1115674
3 9121 0.5311430
4 9121 0.9828884
5 9121 1.0266940
6   NA        NA
> 
> # the age difference between timepoint for each individual
> # Remove the N NAs:
> intervals <- intervals[!is.na(intervals[,2]),]
> cat("\nTime intervals between observations within individuals:\n")

Time intervals between observations within individuals:
> print(round(quantile(intervals[,2]),digits))
   0%   25%   50%   75%  100% 
 0.00  0.96  1.00  1.03 11.87 

Now to estimating models

> simple_multistate <- function(ds, Model, q, qnames, method, cov_names){
+   covariates <- as.formula(paste0("~",cov_names))
+   constraint <- NULL
+   fixedpars <- NULL
+   (Q <- rbind(c(0,q,q,q), c(q,0,q,q),c(q,q,0,0), c(0,0,0,0)))
+   model <- msm(state~age, subject=id, data=ds, center=FALSE,
+                qmatrix=Q, death=TRUE, covariates=covariates,
+                censor= c(-1,-2), censor.states=list(c(1,2), c(1,2)), method=method,
+                constraint=constraint,fixedpars=fixedpars,
+                control=list(trace=0,REPORT=1,maxit=1000,fnscale=10000))
+   # Generate output:
+   cat("\n---------------------------------------\n")
+   cat("\nModel",Model," with covariates: "); print(covariates)
+   cat("and constraints:\n"); print(constraint)
+   cat("and fixedpars:\n"); print(fixedpars)
+   cat("Convergence code =", model$opt$convergence,"\n")
+   minus2LL <-  model$minus2loglik
+   AIC <- minus2LL + 2*length(model$opt$par)
+   cat("\n-2loglik =", minus2LL,"\n")
+   cat("AIC =", AIC,"\n")
+   p <- model$estimates
+   p.se <- sqrt(diag(model$covmat))
+   # Univariate Wald tests:
+   wald <- round((p/p.se)^2,digits)
+   pvalue <- round(1-pchisq((p/p.se)^2,df=1),digits)
+   # Do not test intercepts:
+   wald[1:sum(names(p)=="qbase")] <- "-"
+   pvalue[1:sum(names(p)=="qbase")] <- "-"
+   # Results to screen:
+   cat("\nParameter estimats and SEs:\n")
+   print(cbind(q=qnames,p=round(p,digits),
+               se=round(p.se,digits),"Wald ChiSq"=wald,
+               "Pr>ChiSq"=pvalue),quote=FALSE)
+   cat("\n---------------------------------------\n")
+ }
> 
> simple_multistate(ds = ds, Model=0,q, qnames, method,  cov_names = "1")

---------------------------------------

Model 0  with covariates: ~1
<environment: 0x000000001ba4d5e8>
and constraints:
NULL
and fixedpars:
NULL
Convergence code = 0 

-2loglik = 14331.88 
AIC = 14347.88 

Parameter estimats and SEs:
      q   p     se   Wald ChiSq Pr>ChiSq
qbase q12 -1.41 0.04 -          -       
qbase q21 -7.9  1.71 -          -       
qbase q13 -4.64 0.32 -          -       
qbase q14 -0.48 0.05 -          -       
qbase q24 -0.92 0.06 -          -       
qbase q34 -0.97 0.05 -          -       
qbase q23 -7.26 1.69 -          -       
qbase q31 -0.57 0.06 -          -       

---------------------------------------
Warning message:
In msm.check.times(mf$"(time)", mf$"(subject)", mf$"(state)") :
  Subjects 1738075, 3060782, 3335936 and others only have one complete observation