nutterb / HydeNet

Hybrid Decision Networks in R
Other
23 stars 3 forks source link

coda.samples not working with observed data #62

Closed jarrod-dalton closed 9 years ago

jarrod-dalton commented 9 years ago

I'm having some trouble. I set up a network for coronary artery disease, it compiled fine, and I was able to run coda.samples to get the posterior samples.

Then I fed it some data, and I got an error. What's going on?


g <- HydeNetwork(
~ riskScore | female*age*black*totchol*hdl*antihypertensives*sbp*smoke*diabetes  
+ cad.0 | riskScore*black*female
)

g <- setNode(g, female,            "dbern", p = 0.40)
g <- setNode(g, black,             "dbern", p = 0.13)
g <- setNode(g, smoke,             "dbern", p = 0.30)
g <- setNode(g, diabetes,          "dbern", p = 0.10)
g <- setNode(g, antihypertensives, "dbern", p = 0.20)

g <- setNode(g, totchol, "dnorm", mu = 220, tau = 1/(20^2))
g <- setNode(g, hdl,     "dnorm", mu =  55, tau = 1/( 5^2))
g <- setNode(g, age,     "dnorm", mu =  60, tau = 1/(10^2))
g <- setNode(g, sbp,     "dnorm", mu = 145, tau = 1/(10^2))

g <- setNode(g, riskScore, "determ", define=fromFormula(),
             nodeFormula = 
                 riskScore ~ 
                    ifelse(black==1,
                      ifelse(female==1,
                          17.114*log(age) + 0.94*log(totchol) - 18.92*log(hdl) + 4.475*log(age)*log(hdl) +
                          ifelse(antihypertensives==1,29.291-6.432*log(age),27.820-6.087*log(age))*log(sbp) +
                          0.691*smoke + 0.874*diabetes -86.61,
                          2.469*log(age) + 0.302*log(totchol) - 0.307*log(hdl) +
                          ifelse(antihypertensives==1,1.916,1.809)*log(sbp) + 0.549*(smoke==1) +
                          0.645*(diabetes==1) - 19.54
                      ),
                      ifelse(female==1,
                          -29.799*log(age) + 4.884*pow(log(age),2) + 13.54*log(totchol) - 
                          3.114*log(age)*log(totchol) - 13.578*log(hdl) + 3.149*log(age)*log(hdl) +
                          ifelse(antihypertensives==1,2.019,1.957)*log(sbp) + 7.574*smoke +
                          -1.665*log(age)*smoke + 0.661*diabetes + 29.18,
                          12.344*log(age) + 11.853*log(totchol) - 2.664*log(age)*log(totchol) -
                          7.99*log(hdl) + 1.769*log(age)*log(hdl) +
                          ifelse(antihypertensives==1,1.797,1.764)*log(sbp) + 7.837*(smoke==1) -
                          1.795*log(age)*(smoke==1) + 0.658*(diabetes==1) - 61.18
                      )
                    )
             )

g <- setNode(g, cad.0, nodeType = "dbern",
             p = "1.0-pow(ifelse(black==1,ifelse(female==1,0.9533,0.8954),ifelse(female==1,0.9665,0.9144)),exp(riskScore))")

plot(g)

image

writeNetworkModel(g, pretty=TRUE)
model{
   riskScore <- ifelse(black == 1, ifelse(female == 1, 17.114 * log(age) + 0.94 * log(totchol) - 18.92 * log(hdl) + 4.475 * log(age) * log(hdl) + ifelse(antihypertensives == 1, 29.291 - 6.432 * log(age), 27.82 - 6.087 * log(age)) * log(sbp) + 0.691 * smoke + 0.874 * diabetes - 86.61, 2.469 * log(age) + 0.302 * log(totchol) - 0.307 * log(hdl) + ifelse(antihypertensives == 1, 1.916, 1.809) * log(sbp) + 0.549 * (smoke == 1) + 0.645 * (diabetes == 1) - 19.54), ifelse(female == 1, -29.799 * log(age) + 4.884 * pow(log(age), 
    2) + 13.54 * log(totchol) - 3.114 * log(age) * log(totchol) - 13.578 * log(hdl) + 3.149 * log(age) * log(hdl) + ifelse(antihypertensives == 1, 2.019, 1.957) * log(sbp) + 7.574 * smoke + -1.665 * log(age) * smoke + 0.661 * diabetes + 29.18, 12.344 * log(age) + 11.853 * log(totchol) - 2.664 * log(age) * log(totchol) - 7.99 * log(hdl) + 1.769 * log(age) * log(hdl) + ifelse(antihypertensives == 1, 1.797, 1.764) * log(sbp) + 7.837 * (smoke == 1) - 1.795 * log(age) * (smoke == 1) + 0.658 * (diabetes == 
    1) - 61.18))
   female ~ dbern(0.4)
   age ~ dnorm(60, 0.01)
   black ~ dbern(0.13)
   totchol ~ dnorm(220, 0.0025)
   hdl ~ dnorm(55, 0.04)
   antihypertensives ~ dbern(0.2)
   sbp ~ dnorm(145, 0.01)
   smoke ~ dbern(0.3)
   diabetes ~ dbern(0.1)
   cad.0 ~ dbern(1.0-pow(ifelse(black==1,ifelse(female==1,0.9533,0.8954),ifelse(female==1,0.9665,0.9144)),exp(riskScore)))
}
##########################################################################
# Prediction without any information whatsoever
##########################################################################
monitoredNodes <- c("female","age","black","totchol","hdl","antihypertensives","sbp","smoke","diabetes","riskScore","cad.0")

g0 <- compileJagsModel(g, n.chains = 3, n.adapt = 5000)

S0 <- coda.samples(g0$jags, monitoredNodes, n.iter=16667)
S0 <- as.data.frame(do.call("rbind", S0))
#dplyr::sample_n(S0, 20)

p <- data.frame(stage = "0: No information entered", p = mean(S0$cad.0))
p
                      stage         p
1 0: No information entered 0.1398172
##########################################################################
# Enter some baseline demographic information
##########################################################################
g1 <- compileJagsModel(g, data=list(age=74), n.chains = 3, n.adapt = 5000)
Compiling model graph
   Resolving undeclared variables
   Allocating nodes
   Graph Size: 150

Initializing model
S1 <- coda.samples(g1$jags, c("cad.0"), n.iter=16667)
Error in coda.samples(g1$jags, c("cad.0"), n.iter = 16667) : 
  attempt to apply non-function
jarrod-dalton commented 9 years ago

I also get the error message when I use HydePosterior():

g1 <- compileJagsModel(g, data = list(age=73, black=1, smoke=1), n.chains = 3, n.adapt = 5000)
Compiling model graph
   Resolving undeclared variables
   Allocating nodes
   Graph Size: 149

Initializing model
S1 <- HydePosterior(g1, variable.names = c("riskScore", "cad.0", "black", "age"), n.iter=16667)
Error in rjags::coda.samples(cHN[[j]]$jags[[1]], variable.names = variable.names,  : 
                               attempt to apply non-function
jarrod-dalton commented 9 years ago

okay - so I completely hacked the HydePosterior() function and seem to have gotten it working.

But I did this mindlessly and don't want to commit it to the development branch. In fact I think it's wrong. But it works for my current use (no policy matrices).

(I also have a prior version of the package sitting on my laptop and don't want to sync/reinstall until you've been able to handle the stuff with cpt() and inputCPT()).

So I'll paste the function below.

The issue seems to be the fact that the original (correct) version of the function referred to the cHN object as a list with multiple compiled HydeNet objects within. But when I ran compileJagsModel(), it seems the structure had changed - instead of cHN[[j]]$jags[[1]], what I think the HydePosterior() function really wanted in my case was cHN$jags[[1]]. Same with cHN$observed and cHN$factorRef.

#' @name HydePosterior
#' @export HydePosterior
#' 
#' @title Posterior Distributions of a Decision Network
#' @description The posterior distributions of the decision network can be 
#'   evaluated to determine the probabilistic outcomes based on the decision
#'   inputs in the model as well as subject specific factors.
#'   
#' @param cHN A \code{compiledHydeNetwork} object as returned by 
#'   \code{compileJagsNetwork}.
#' @param variable.names a character vector giving the names of variables to be monitored.
#' @param n.iter number of iterations to monitor.
#' @param thin thinning interval for monitors.
#' @param ... options arguments that are passed to the update method for 
#'   jags model objects.
#' @param monitor_observed If TRUE, the observed or fixed variables (those
#'   passed to the \code{data} argument in \code{compileJagsNetwork}) are 
#'   forced into \code{variable.names} if not already provided.  This is 
#'   recommended, especially if you will be binding multiple JAGS runs 
#'   together.
#'   
#' @details This is essentially a wrapper around \code{coda.samples} that 
#'   returns in a list the output for each run of \code{coda.samples} over 
#'   the rows of the policy/decision matrix given in the \code{data} argument 
#'   of \code{compileJagsNetwork}.
#'   
#' @return A list of class \code{HydePosterior} with elements \code{codas} 
#'   (the MCMC matrices from \code{coda.samples}), \code{observed} (the values
#'   of the variables that were observed), \code{dag} (the dag object for 
#'   convenience in displaying the network), and \code{factorRef} (giving the
#'   mappings of factor levels to factor variables).  
#'   
#'   The only rationale for giving this object its own class was because it 
#'   produces an enormous amount of material to be printed.  A distinct 
#'   \code{print} method has been written for this object.
#'   
#' @author Jarrod Dalton and Benjamin Nutter
#' 
#' @examples
#' data(PE, package="HydeNet")
#' Net <- HydeNetwork(~ wells + 
#'                      pe | wells + 
#'                      d.dimer | pregnant*pe + 
#'                      angio | pe + 
#'                      treat | d.dimer*angio + 
#'                      death | pe*treat,
#'                      data = PE) 
#'   
#'                  
#' compiledNet <- compileJagsModel(Net, n.chains=5)
#' 
#' #* Generate the posterior distribution
#' Posterior <- HydePosterior(compiledNet, 
#'                            variable.names = c("d.dimer", "death"), 
#'                            n.iter = 1000)
#' 
#' #* Posterior Distributions for a Decision Model
#' Net <- setDecisionNodes(Net, angio, treat)
#' decisionNet <- compileDecisionModel(Net, n.chains=5)
#' decisionsPost <- HydePosterior(decisionNet, 
#'                                variable.names = c("d.dimer", "death"),
#'                                n.iter = 1000)
#' 
#' 

HydePosterior <- function(cHN, variable.names, n.iter, thin=1, ...,
                          monitor_observed=TRUE){
  if (monitor_observed){
    variable.names <- 
      if (class(cHN$jags) == "jags")
        unique(c(variable.names, names(cHN$observed)))
      else unique(c(variable.names, names(cHN[[1]]$observed)))
  }

  if (class(cHN$jags) == "jags"){
    codas <- coda.samples(cHN$jags, variable.names, n.iter, thin, ...)
    HydePost <- list(codas = codas,
                     observed = cHN$observed,
                     dag = cHN$dag,
                     factorRef = cHN$factorRef)
  }
  else{ 
    codas <- rjags::coda.samples(cHN$jags[[1]],
                                 variable.names = variable.names,
                                 n.iter = n.iter,
                                 thin = thin,
                                 ...)

    observed <- do.call("rbind", cHN$observed)

    HydePost <- list(codas = codas,
                     observed = observed,
                     dag = cHN$dag,
                     factorRef = cHN$factorRef)
  }

  class(HydePost) <- "HydePosterior"
  HydePost

}
nutterb commented 9 years ago

It looks like the problem is that sometimes compileJagsModel is creating the jags element as a JAGS model, and sometimes it's creating the jags element as a length 1 list of JAGS models. It shouldn't be a list.

So the problem is actually in compileJagsModel. I'm working on it now.

nutterb commented 9 years ago

Found it. I had tried to implement something to make compileDecisionModel more efficient and then decided the place to make that work was compileDecisionModel, but I forgot to restore the original code to compileJagsModel. All should be right with the world now. HydePosterior is also working. I'd recommend you not commit your changes.

Sorry for the error. Thanks for catching it.