Open aalfons opened 2 years ago
Hi, I wonder whether you managed to implement this feature and whether it is possible now to run the mediation analysis for multilevel data?
Unfortunately not. Please note that as stated in my comment above, this is not just a simple matter of implementing a feature. The entire statistical methodology needs to be derived and validated first. This takes a time to do, and currently I have other things on my research agenda. So I'm afraid that I still cannot give any timeline for this.
Thank you for your response! I've been working on my own implementation to investigate whether M
mediates the effect of X1
, X2
, and X3
(multiple independent variables) on Y
. I'll paste my code here in case it might be helpful to others. I would appreciate it if you could take a look and provide feedback when you have some spare time.
# packages
library(robustlmm)
library(lme4)
library(lmerTest)
library(boot)
library(parallel)
# helper function to compute p-values, as rlmer does not provide p_values
# other option would be to run tab_model() on rlmer object
compute_p_values <- function(model) {
# extract fixed effects, se and t-values
fixed_effects <- fixef(model)
se <- sqrt(diag(vcov(model)))
t_values <- fixed_effects / se
# run lmer to get df
formula <- formula(model)
data <- model@frame
lmer_model <- lmer(formula, data = data)
coefs <- data.frame(coef(summary(lmer_model)))
df <- coefs$df
p_values <- 2 * pt(-abs(t_values), df)
return(data.frame(Estimate = fixed_effects, StdError = se, t_value = t_values, p_value = p_values))
}
multilevel_mediation <- function(data, y, m, x, covariates, random_effects, group_var, n_bootstrap = 1000, n_cores = 2, max_iterations = 500, tol = 1e-6) {
# Fit the full models
# total effect model
total_effect_formula <- as.formula(paste(y, "~", paste(covariates, collapse = " + "), "+", paste(x, collapse = " + "), "+ (", random_effects, "|", group_var, ")"))
fit_totaleffect <- rlmer(total_effect_formula, data = data, max.iter = max_iterations, rel.tol = tol)
# mediator model
mediator_formula <- as.formula(paste(m, "~", paste(covariates, collapse = " + "), "+", paste(x, collapse = " + "), "+ (", random_effects, "|", group_var, ")"))
fit_mediator <- rlmer(mediator_formula, data = data, max.iter = max_iterations, rel.tol = tol)
# dependent variable model
dv_formula <- as.formula(paste(y, "~", m, "+", paste(covariates, collapse = " + "), "+", paste(x, collapse = " + "), "+ (", random_effects, "|", group_var, ")"))
fit_dv <- rlmer(dv_formula, data = data, max.iter = max_iterations, rel.tol = tol)
# Extract coeffs
coefficients_totaleffect <- fixef(fit_totaleffect)
coefficients_mediator <- fixef(fit_mediator)
coefficients_dv <- fixef(fit_dv)
# Calculate the indirect effects for each treatment variable
indirect_effects <- sapply(x, function(var) {
coefficients_mediator[var] * coefficients_dv[m]
})
# Extract direct effects for each treatment variable
direct_effects <- coefficients_dv[x]
# Extract p-values for total and direct effects
p_values_total <- compute_p_values(fit_totaleffect)[x, 'p_value']
p_values_direct <- compute_p_values(fit_dv)[x, 'p_value']
# Bootstrapping for confidence intervals and p-values
unique_ids <- unique(data[[group_var]])
n_ids <- length(unique_ids)
# Define the bootstrapping function
bootstrap_function <- function(i) {
sampled_ids <- sample(unique_ids, n_ids, replace = TRUE) # make sense because replace = T; check: unique(sampled_ids)
sample_data <- data[data[[group_var]] %in% sampled_ids, ]
fit_mediator_boot <- rlmer(mediator_formula, data = sample_data, max.iter = max_iterations, rel.tol = tol)
fit_dv_boot <- rlmer(dv_formula, data = sample_data, max.iter = max_iterations, rel.tol = tol)
fit_totaleffect_boot <- rlmer(total_effect_formula, data = sample_data, max.iter = max_iterations, rel.tol = tol)
coef_mediator_boot <- fixef(fit_mediator_boot)
coef_dv_boot <- fixef(fit_dv_boot)
coef_totaleffect_boot <- fixef(fit_totaleffect_boot)
boot_indirect_effects <- sapply(x, function(var) {
coef_mediator_boot[var] * coef_dv_boot[m]
})
boot_total_effects <- coef_totaleffect_boot[x]
boot_direct_effects <- coef_dv_boot[x]
return(
list(indirect = boot_indirect_effects, total = boot_total_effects, direct = boot_direct_effects)
)
}
# Set up parallel processing
cl <- makeCluster(n_cores)
clusterExport(cl,
list("data", "y", "m", "x", "covariates", "random_effects", "group_var", "mediator_formula", "dv_formula", "total_effect_formula", "unique_ids", "n_ids"),
envir=environment())
clusterEvalQ(cl, {
library(robustlmm)
library(lme4)
})
boot_results <- parLapply(cl, 1:n_bootstrap , bootstrap_function, tol, max_iterations, n_bootstrap) #
stopCluster(cl)
# Combine bootstrap results
boot_indirect_effects <- do.call(rbind, lapply(boot_results, function(x) x$indirect))
boot_total_effects <- do.call(rbind, lapply(boot_results, function(x) x$total))
boot_direct_effects <- do.call(rbind, lapply(boot_results, function(x) x$direct))
# Calculate confidence intervals
ci_low_indirect <- apply(boot_indirect_effects, 2, function(x) quantile(x, 0.025))
ci_high_indirect <- apply(boot_indirect_effects, 2, function(x) quantile(x, 0.975))
ci_low_total <- apply(boot_total_effects, 2, function(x) quantile(x, 0.025))
ci_high_total <- apply(boot_total_effects, 2, function(x) quantile(x, 0.975))
ci_low_direct <- apply(boot_direct_effects, 2, function(x) quantile(x, 0.025))
ci_high_direct <- apply(boot_direct_effects, 2, function(x) quantile(x, 0.975))
# Calculate p-values for indirect effects using bootstrapping
p_values_indirect <- apply(boot_indirect_effects, 2, function(x) {
2 * min(mean(x <= 0), mean(x >= 0)) # 2 * to account for 2-sided test
})
# Summarize results
results_indirect <- data.frame(
Treatment = x,
Indirect_Effect = indirect_effects,
CI_Low_Indirect = ci_low_indirect,
CI_High_Indirect = ci_high_indirect,
P_Value_Indirect = p_values_indirect
)
results_direct <- data.frame(
Treatment = x,
Direct_Effect = direct_effects,
CI_Low_Direct = ci_low_direct,
CI_High_Direct = ci_high_direct,
P_Value_Direct = p_values_direct
)
results_total <- data.frame(
Treatment = x,
Total_Effect = coefficients_totaleffect[x],
CI_Low_Total = ci_low_total,
CI_High_Total = ci_high_total,
P_Value_Total = p_values_total
)
return(list(
fit_totaleffect = fit_totaleffect,
fit_mediator = fit_mediator,
fit_dv = fit_dv,
indirect_effects = results_indirect,
direct_effects = results_direct,
total_effects = results_total
))
}
# run the function
result <- multilevel_mediation(
data = df,
y = 'Y',
m = 'M',
x = c('X1', 'X2', 'X3'),
covariates = c("C1", "C2"),
random_effects = "1 + TIME",
group_var = "ID",
n_bootstrap = 100,
n_cores = 10,
max_iterations = 500,
tol = 1e-6
)
# Display results
print("Indirect Effects:")
print(result$indirect_effects)
print("Direct Effects:")
result$direct_effects[['p-val']] = round(result[["direct_effects"]][["P_Value_Direct"]], 3)
print(result$direct_effects)
print("Total Effects:")
result$total_effects[['p-val']] = round(result[["total_effects"]][["P_Value_Total"]], 3)
print(result$total_effects)
This requires to develop the fast-and-robust bootstrap methodology for robust estimators of multilevel models. Such robust estimators are implemented in package
robustlmm
. See also Manuel Koller's PhD thesis.