aalfons / robmed

Perform mediation analysis via a fast-and-robust bootstrap test, as well as various other methods
GNU General Public License v3.0
6 stars 0 forks source link

Add multilevel models #35

Open aalfons opened 2 years ago

aalfons commented 2 years ago

This requires to develop the fast-and-robust bootstrap methodology for robust estimators of multilevel models. Such robust estimators are implemented in package robustlmm. See also Manuel Koller's PhD thesis.

ksgfan commented 1 week ago

Hi, I wonder whether you managed to implement this feature and whether it is possible now to run the mediation analysis for multilevel data?

aalfons commented 6 days ago

Unfortunately not. Please note that as stated in my comment above, this is not just a simple matter of implementing a feature. The entire statistical methodology needs to be derived and validated first. This takes a time to do, and currently I have other things on my research agenda. So I'm afraid that I still cannot give any timeline for this.

ksgfan commented 5 days ago

Thank you for your response! I've been working on my own implementation to investigate whether M mediates the effect of X1, X2, and X3 (multiple independent variables) on Y. I'll paste my code here in case it might be helpful to others. I would appreciate it if you could take a look and provide feedback when you have some spare time.

# packages
library(robustlmm)
library(lme4)
library(lmerTest)
library(boot)
library(parallel)

# helper function to compute p-values, as rlmer does not provide p_values
# other option would be to run tab_model() on rlmer object
compute_p_values <- function(model) {
  # extract fixed effects, se and t-values    
  fixed_effects <- fixef(model)
  se <- sqrt(diag(vcov(model)))
  t_values <- fixed_effects / se
  # run lmer to get df
  formula <- formula(model)
  data <- model@frame
  lmer_model <- lmer(formula, data = data)
  coefs <- data.frame(coef(summary(lmer_model)))
  df <- coefs$df
  p_values <- 2 * pt(-abs(t_values), df)

  return(data.frame(Estimate = fixed_effects, StdError = se, t_value = t_values, p_value = p_values))
}

multilevel_mediation <- function(data, y, m, x, covariates, random_effects, group_var, n_bootstrap = 1000, n_cores = 2, max_iterations = 500, tol = 1e-6) {

  # Fit the full models 
  # total effect model
  total_effect_formula <- as.formula(paste(y, "~", paste(covariates, collapse = " + "), "+", paste(x, collapse = " + "), "+ (", random_effects, "|", group_var, ")"))
  fit_totaleffect <- rlmer(total_effect_formula, data = data, max.iter = max_iterations, rel.tol = tol)

  # mediator model
  mediator_formula <- as.formula(paste(m, "~", paste(covariates, collapse = " + "), "+", paste(x, collapse = " + "), "+ (", random_effects, "|", group_var, ")"))
  fit_mediator <- rlmer(mediator_formula, data = data, max.iter = max_iterations, rel.tol = tol)

  # dependent variable model
  dv_formula <- as.formula(paste(y, "~", m, "+", paste(covariates, collapse = " + "), "+", paste(x, collapse = " + "), "+ (", random_effects, "|", group_var, ")"))
  fit_dv <- rlmer(dv_formula, data = data, max.iter = max_iterations, rel.tol = tol)

  # Extract coeffs
  coefficients_totaleffect <- fixef(fit_totaleffect)
  coefficients_mediator <- fixef(fit_mediator)
  coefficients_dv <- fixef(fit_dv)

  # Calculate the indirect effects for each treatment variable
  indirect_effects <- sapply(x, function(var) {
    coefficients_mediator[var] * coefficients_dv[m]
  })

  # Extract direct effects for each treatment variable
  direct_effects <- coefficients_dv[x]

  # Extract p-values for total and direct effects
  p_values_total <- compute_p_values(fit_totaleffect)[x, 'p_value']
  p_values_direct <- compute_p_values(fit_dv)[x, 'p_value']

  # Bootstrapping for confidence intervals and p-values
  unique_ids <- unique(data[[group_var]])
  n_ids <- length(unique_ids)

  # Define the bootstrapping function
  bootstrap_function <- function(i) {
    sampled_ids <- sample(unique_ids, n_ids, replace = TRUE) # make sense because replace = T; check: unique(sampled_ids)
    sample_data <- data[data[[group_var]] %in% sampled_ids, ]

    fit_mediator_boot <- rlmer(mediator_formula, data = sample_data, max.iter = max_iterations, rel.tol = tol)
    fit_dv_boot <- rlmer(dv_formula, data = sample_data, max.iter = max_iterations, rel.tol = tol)
    fit_totaleffect_boot <- rlmer(total_effect_formula, data = sample_data, max.iter = max_iterations, rel.tol = tol)

    coef_mediator_boot <- fixef(fit_mediator_boot)
    coef_dv_boot <- fixef(fit_dv_boot)
    coef_totaleffect_boot <- fixef(fit_totaleffect_boot)

    boot_indirect_effects <- sapply(x, function(var) {
      coef_mediator_boot[var] * coef_dv_boot[m]
    })

    boot_total_effects <- coef_totaleffect_boot[x]
    boot_direct_effects <- coef_dv_boot[x]

    return(
            list(indirect = boot_indirect_effects, total = boot_total_effects, direct = boot_direct_effects)
    )
  }

  # Set up parallel processing
  cl <- makeCluster(n_cores)
  clusterExport(cl, 
                list("data", "y", "m", "x", "covariates", "random_effects", "group_var", "mediator_formula", "dv_formula", "total_effect_formula", "unique_ids", "n_ids"),
                envir=environment())
  clusterEvalQ(cl, {
    library(robustlmm)
    library(lme4)
  })

  boot_results <- parLapply(cl, 1:n_bootstrap , bootstrap_function, tol, max_iterations, n_bootstrap) # 
  stopCluster(cl)

  # Combine bootstrap results
  boot_indirect_effects <- do.call(rbind, lapply(boot_results, function(x) x$indirect))
  boot_total_effects <- do.call(rbind, lapply(boot_results, function(x) x$total))
  boot_direct_effects <- do.call(rbind, lapply(boot_results, function(x) x$direct))

  # Calculate confidence intervals
  ci_low_indirect <- apply(boot_indirect_effects, 2, function(x) quantile(x, 0.025))
  ci_high_indirect <- apply(boot_indirect_effects, 2, function(x) quantile(x, 0.975))

  ci_low_total <- apply(boot_total_effects, 2, function(x) quantile(x, 0.025))
  ci_high_total <- apply(boot_total_effects, 2, function(x) quantile(x, 0.975))

  ci_low_direct <- apply(boot_direct_effects, 2, function(x) quantile(x, 0.025))
  ci_high_direct <- apply(boot_direct_effects, 2, function(x) quantile(x, 0.975))

  # Calculate p-values for indirect effects using bootstrapping
  p_values_indirect <- apply(boot_indirect_effects, 2, function(x) {
    2 * min(mean(x <= 0), mean(x >= 0)) # 2 * to account for 2-sided test
  })

  # Summarize results
  results_indirect <- data.frame(
    Treatment = x,
    Indirect_Effect = indirect_effects,
    CI_Low_Indirect = ci_low_indirect,
    CI_High_Indirect = ci_high_indirect,
    P_Value_Indirect = p_values_indirect
  )

  results_direct <- data.frame(
    Treatment = x,
    Direct_Effect = direct_effects,
    CI_Low_Direct = ci_low_direct,
    CI_High_Direct = ci_high_direct,
    P_Value_Direct = p_values_direct
  )

  results_total <- data.frame(
    Treatment = x,
    Total_Effect = coefficients_totaleffect[x],
    CI_Low_Total = ci_low_total,
    CI_High_Total = ci_high_total,
    P_Value_Total = p_values_total
  )

  return(list(
    fit_totaleffect = fit_totaleffect,
    fit_mediator = fit_mediator,
    fit_dv = fit_dv,
    indirect_effects = results_indirect,
    direct_effects = results_direct,
    total_effects = results_total
  ))
}

# run the function
result <- multilevel_mediation(
  data = df,
  y = 'Y',
  m = 'M',
  x = c('X1', 'X2', 'X3'),
  covariates = c("C1", "C2"), 
  random_effects = "1 + TIME",
  group_var = "ID",
  n_bootstrap = 100,  
  n_cores = 10,
  max_iterations = 500, 
  tol = 1e-6
)

# Display results
print("Indirect Effects:")
print(result$indirect_effects)

print("Direct Effects:")
result$direct_effects[['p-val']] = round(result[["direct_effects"]][["P_Value_Direct"]], 3)
print(result$direct_effects)

print("Total Effects:")
result$total_effects[['p-val']] = round(result[["total_effects"]][["P_Value_Total"]], 3)
print(result$total_effects)