harrelfe / Hmisc

Harrell Miscellaneous
Other
210 stars 81 forks source link

Error from applying `soprobMarkovOrdm()` to a `blrm` model for binary outcome #156

Open aaronxs opened 2 years ago

aaronxs commented 2 years ago

Hi there,

I was trying to apply the Bayesian first order Markov model (from this link) to a binary longitudinal data set that I have.

However, I am getting the following error when I tried to use soprobMarkovOrdm() get the state occupancy probabilities (SOP):

Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
  contrasts can be applied only to factors with 2 or more levels

Using the uprobB() function from the reference link above (with modifications in order to adapt it to my data), I was able to locate the cause of this error. It is caused by the application of the predict() functions to calculate the post time 1 conditional probabilities (i.e. pp <- predict(fit, edata, type='fitted.ind', posterior.summary='all') ) as edata has more than 2 row of data (since the outcome has 2 levels).

I was able to get the uprobB() function to run by replacing:

pp <- predict(fit, edata, type='fitted.ind', posterior.summary='all') 

with the following (plus additional modifications, see uprobB_new() below with comment # my modification)

pp0 <- predict(fit, edata[1, ], type = 'fitted.ind', posterior.summary = 'all')
pp1 <- predict(fit, edata[2, ], type = 'fitted.ind', posterior.summary = 'all')

Here are the code I use for this analysis along with a sub-sample of my data

library(tidyverse);library(rmsb);library(Hmisc)

# modified version of uproB()
uprobB_new <- function(fit, data, times, ylevels, 
                       pvarname = "yprev", gap = FALSE, absorb = NULL) {
  if(! length(fit$draws)) stop('fit does not have posterior draws')
  nd <- nrow(fit$draws)
  k  <- length(ylevels)
  ss  <- length(times)
  P  <- array(NA, c(nd, ss, k),
              dimnames=list(paste('draw', 1 : nd), as.character(times), 
                            as.character(ylevels)))

  # Never uncondition on initial state
  data$time <- times[1]
  # if(gap) data$gap <- times[1]

  p <- predict(fit, data, type = 'fitted.ind', posterior.summary='all')
  if(k == 2) p <- c(1 - p, p)   # predict doesn't predict all levels if Y binary
  P[, 1, ] <- p

  rnameprev <- paste("t-1", ylevels)
  rnameprevna <- paste("t-1", setdiff(ylevels, absorb))

  # cp: matrix of conditional probabilities of Y conditioning on previous time Y
  # Columns = k conditional probabilities conditional on a single previous state
  # Rows    = all possible previous states
  # This is for a single posterior draw
  cp <- matrix(NA, nrow=k, ncol=k, 
               dimnames=list(paste('t-1', ylevels), paste('t', ylevels)))

  data <- as.list(data)
  data[[pvarname]] <- setdiff(ylevels, absorb)  # don't request estimates after absorbing state
  edata <- expand.grid(data)

  for(it in 2 : ss) {
    edata$time <- times[it]
    # if(gap) edata$gap <- times[it] - times[it - 1]
    # pp <- predict(fit, edata, type='fitted.ind', posterior.summary='all')
    pp0 <- predict(fit, edata[1, ], type = 'fitted.ind', posterior.summary = 'all') # my modification
    pp1 <- predict(fit, edata[2, ], type = 'fitted.ind', posterior.summary = 'all') # my modification

    for(idraw in 1 : nd) {
      # cp <- rbind(c(1., rep(0., k - 1)), pp[idraw, ,])

      pp <- c(pp0[idraw, ], pp1[idraw, ]) # my modification
      cp <- cbind(1 - pp, pp) # my modification

      # Compute unconditional probabilities of being in all possible states
      # at current time t
      P[idraw, it, ] <- t(cp) %*% P[idraw, it - 1, ]
    }
  }
  P
}

dd <- structure(list(time = c(0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 
                              4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 
                              0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 
                              16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 
                              8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16), 
                     y = c(0, 0, 0, 
                           0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 
                           1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 
                           0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                           1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 
                           1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 
                           0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 
                           1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                           0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
                           0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 
                           1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
                           0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 
                           0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 
                           0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
                           0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 
                           1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 
                           1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 
                           1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 
                           0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 
                           0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 
                           1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 
                           1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 
                           1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 
                           0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
                           1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                     yprev = c(0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                               0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 
                               0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 
                               1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 
                               1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 
                               0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                               0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
                               0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 
                               1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 
                               1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
                               0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 
                               0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 
                               1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 
                               1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
                               1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
                               0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 
                               0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), 
                row.names = c(NA, 
                              -500), 
                class = c("tbl_df", "tbl", "data.frame"))

# fit blrm model
mm <- blrm(y ~ yprev + pol(time), 
           data = dd,
           loo = FALSE, 
           iter = 1000, 
           seed = 2022,
           # warmup = 200,
           chains = 1, refresh = 50)

# calculate the SOPs using the soprobMarkovOrdm() function (will return error)
# ppred <- soprobMarkovOrdm(
#   object = mm,
#   data = data.frame(yprev = '0'),
#   times = sort(unique(dd$time)),
#   ylevels = c(0, 1),
#   absorb = NULL,
#   tvarname = "time",
#   pvarname= "yprev",
#   gap = NULL
# )

# calculate OPs using a modified version of uproB()
ppred <- uprobB_new(fit = mm, 
                    data = data.frame(yprev=0), 
                    times = c(sort(unique(dd$time))), 
                    ylevels = c(0, 1),
                    pvarname='yprev', 
                    absorb = NULL,
                    gap = NULL)

# derive summary statistic
g <- function(x) {
  w <- c(mean(x)*100, HPDint(x)*100)
  w <- round(w, 2)
  names(w) <- c('p_predicted', 'lower_predicted', 'upper_predicted')
  w
}
ppred_summ <- apply(ppred[, , 2], 2, g)
ppred_summ <- as_tibble(t(ppred_summ)) %>%
  mutate(time = sort(unique(dd$time))) %>%
  select(time, p_predicted, lower_predicted, upper_predicted)

# merge observed and predicted probability for comparisons
prop_summ <- dd %>%
  group_by(time) %>%
  summarise(N = n(),
            n_observed = sum(y == 1),
            p_observed = round(n_observed/N*100, 2)) %>%
  left_join(ppred_summ, by = "time")       
harrelfe commented 2 years ago

This doesn't appear to be an Hmisc -related problem.