kosukeimai / mediation

R package mediation
58 stars 29 forks source link

Extracting estimates from mediate() output objects for pooling by Rubin's rules #65

Open hoc307 opened 6 months ago

hoc307 commented 6 months ago

I wanted to reach out because I'm currently working on a causal mediation analysis using MICE imputed datasets. I've fitted the mediator model using lme4::lmer and the dependent variable (dv) model using lme4::glmer. These models were run with 10 MICE imputed datasets, and I included fixed effects covariates and a random effect as "+(1 | ID)".

  1. I've tried using both $d.avg.group and $d.avg.sims to obtain the results, they yield the same results. I have a suspicion that this might be because I didn't specify group.out for either the mediator or the dv model. So, my question is: Does the default option in this case draws from the mediator model or the dv model?

  2. I would like to confirm the extraction of correct mediate() output parameters for pooling based on Rubin's rules. Please see extraction and calculation below for ACME, ADE and TDE and Prop.mediated.

Thank you so much for taking the time to read and respond to my question. I truly appreciate your help!

Below is the mediate() fitting:

cm_list contains 10 of individually fitted models from mfit_list and dvfit_list by each of the 10 imputed set

for(i in seq_along(mfit_list)) {
  cm_list[[i]] <- mediate(mfit_list[[i]], dvfit_list[[i]], 
                          treat = 'x',
                          mediator = 'm', 
                          sims = 300, 
                          seed = 1234, 
                          boot = FALSE)
}

Please see codes for extraction and pooling below:


#Part A pooling point estimates:
acme_list <- sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) x$d.avg)
ade_list <- sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) x$z.avg)
tde_list <- sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) x$tau.coef)
prop_list <- sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) x$n.avg)

#Part B: pooling standard errors by Rubin's rules reference from https://bookdown.org/mwheymans/bookmi/rubins-rules.html

# Within-imputation variance (W) 
W <- mean(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) var(x$d.avg.group))) 

# Between-imputation variance (B) 
# Pooled estimate
pooled_acme <- mean(acme_list)
B <- sum(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) (pooled_acme-x$d.avg)^2))/(10-1)

# Total variance (T)
T <- W + B + (B/10) 

# Degrees of freedom for the T distribution
df <- (10 - 1) * (1 + W/(10 * B))^2

# Confidence interval using the T distribution
lower_ci <- pooled_acme - qt(0.975, df) * sqrt(T)
upper_ci <- pooled_acme + qt(0.975, df) * sqrt(T)

# Pooled ACME and its 95% CI
pooled_acme_ci <- c(lower_ci, upper_ci)

##for ADE
W <- mean(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) var(x$z.avg.group))) 

#PartB:# Between-imputation variance (B)
# Pooled ADE estimate
pooled_ade <- mean(ade_list)
B <- sum(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) (pooled_ade-x$z.avg)^2))/(10-1)

# Total variance (T)
T <- W + B + (B/10) 

# Degrees of freedom for the T distribution
df <- (10 - 1) * (1 + W/(10 * B))^2

# Confidence interval using the T distribution
lower_ci <- pooled_ade - qt(0.975, df) * sqrt(T)
upper_ci <- pooled_ade + qt(0.975, df) * sqrt(T)

# Pooled ACME and its 95% CI
pooled_ade_ci <- c(lower_ci, upper_ci)

##for TDE
#Within-imputation variance(W)
W <- mean(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) var(x$tau.coef.group))) # "tau.coef.group" for tde, "n.avg.group" for proportion mediated 

#PartB:# Between-imputation variance (B)
# Pooled estimate
pooled_tde <- mean(tde_list)
B <- sum(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) (pooled_tde-x$tau.coef)^2))/(10-1)

# Total variance (T)
T <- W + B + (B/10) 

# Degrees of freedom for the T distribution
df <- (10 - 1) * (1 + W/(10 * B))^2

# Confidence interval using the T distribution
lower_ci <- pooled_tde - qt(0.975, df) * sqrt(T)
upper_ci <- pooled_tde + qt(0.975, df) * sqrt(T)

# Pooled tdeand its 95% CI
pooled_tde_ci <- c(lower_ci, upper_ci)

##for Proprotion Mediated

#within imputation variance
W <- mean(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) var(x$n.avg.group))) 

#PartB:# Between-imputation variance (B)
# Pooled estimate
pooled_prop <- mean(prop_list)
B <- sum(sapply(list(med1, med2, med3, med4, med5,med6,med7,med8,med9,med10), function(x) (pooled_prop-x$n.avg)^2))/(10-1)

# Total variance (T)
T <- W + B + (B/10) 

# Degrees of freedom for the T distribution
df <- (10 - 1) * (1 + W/(10 * B))^2

# Confidence interval using the T distribution
lower_ci <- pooled_prop - qt(0.975, df) * sqrt(T)
upper_ci <- pooled_prop + qt(0.975, df) * sqrt(T)

# Pooled proportion mediated and its 95% CI
pooled_prop_ci <- c(lower_ci, upper_ci)

# Create a data frame with the pooled estimates and their confidence intervals
pooled_results <- data.frame(
  Estimate = c((pooled_acme), (pooled_ade), exp(pooled_tde), pooled_prop),
  CI_Lower = c(pooled_acme_ci[1], pooled_ade_ci[1], pooled_tde_ci[1], pooled_prop_ci[1]),
  CI_Upper = c(pooled_acme_ci[2], pooled_ade_ci[2], pooled_tde_ci[2], pooled_prop_ci[2])
)

# Add row names to identify each estimate
rownames(pooled_results) <- c("ACME", "ADE", "TDE", "Proportion Mediated")

Sample results:

                       Estimate     CI_Lower     CI_Upper
ACME                -0.00537041 -0.009216409 -0.001524412
ADE                 -0.02126277 -0.035035174 -0.007490358
TDE                 -0.02663318 -0.043323813 -0.009942540
Proportion Mediated  0.19941042  0.122061126  0.276759718