Meta-analysis proposal - Githubissues

gerasy1987 commented 3 years ago

Draft structure

Get all estimates produced for each of the studies (depending on the design) in a single data frame that will include estimator and standard error for each estimator (or potentially for each sampling-estimator pair)
Then we can feed this data frame into Stan model akin the one below a. Use the data frame to produce observed estimator vectors for each estimator/sampling-estimator pair, observed* b. Use the data frame produced by study designs to produce arrays of observed estimates and corresponding standard errors (with dummy value where missing), est* and est*_sd c. Assume that deviation from true parameter of interest (error) is driven by use of estimator
We then declare the relevant estimands and run diagnosis


stan_model_meta <- " 
  data {
    int<lower=0> N;   // number of studies 
    int<lower=0> K;   // max number of estimator (estimator-sampling pairs) 
    // number of studies with estimator (estimator-sampling pair)
    int<lower=0,upper=N> N1;
    int<lower=0,upper=N> N2;
    int<lower=0,upper=N> N3;
    // ids of studies with specific estimator (estimator-sampling pair)
    int<lower=0,upper=N> observed1[N1];   
    int<lower=0,upper=N> observed2[N2];
    int<lower=0,upper=N> observed3[N3];
    // parameter estimates
    real<lower=0,upper=1> est1[N1];   
    real<lower=0,upper=1> est2[N2];
    real<lower=0,upper=1> est3[N3];
    // estimated standard errors of parameter estimates
    real<lower=0> est1_sd[N1]; 
    real<lower=0> est2_sd[N2];
    real<lower=0> est3_sd[N3];
  }
  parameters {
    // (additive) error factor for each estimator/estimator-sampling pair
    real<lower=-1,upper=1> error[K]; 
    // prevalence estimate for each study
    vector<lower=0,upper=1>[N] alpha;
    // need to add Sigma to allow for interdependence of errors across estimators
    // or studies
  }

  model
    target += normal_lpmf(est1 | error[1] + alpha[observed1], est1_sd);
    target += normal_lpmf(est2 | error[2] + alpha[observed2], est2_sd);
    target += normal_lpmf(est3 | error[3] + alpha[observed3], est3_sd);
  }
  "

get_meta_estimands <- function(data) {

  data.frame(estimand_label = c(paste0("prevalence_", 1:N)),
             estimand = c(data[,1]),
             stringsAsFactors = FALSE)
}

get_meta_estimators = function(data) {

  stan_data <- list(N = nrow(data),
                    K = (ncol(data)-1)/2)

  for (k in 1:stan_data$K) {
    stan_data[[ paste0("observed",k) ]] <- 
      which(!is.na(data[,(2 * k)]))
    stan_data[[ paste0("N",k) ]] <- 
      length(stan_data[[paste0("observed",k)]])
    stan_data[[ paste0("est",k) ]] <- 
      data[stan_data[[paste0("observed",k)]],(2 * k)]
    stan_data[[ paste0("est",k,"_sd") ]] <- 
      data[stan_data[[paste0("observed",k)]],(1 + 2 * k)]
  }

  fit <- 
    rstan::stan(fit = stan_model_meta, 
                data = stan_data, 
                iter = 4000) %>% 
    extract

  data.frame(estimator_label = c(paste0("prev_", 1:N)),
             estimate = c(apply(fit$alpha, 2, mean)),
             sd =   c(apply(fit$alpha, 2, sd)),
             estimand_label = c(paste0("hidden_prev", 1:N)),
             big_Rhat = big_Rhat
             )

  }

Meta declaration


pop_args <- 
  list(study_1 = study_1$pop,
       study_2 = study_2$pop,
       study_3 = study_3$pop,
       study_4 = study_4$pop)

sample_args <- 
  list(study_1 = study_1$sample,
       study_2 = study_2$sample,
       study_3 = study_3$sample,
       study_4 = study_4$sample)

study_estimators <- 
  list(study_1 = study_1$estimators,
       study_2 = study_2$estimators,
       study_3 = study_3$estimators,
       study_4 = study_4$estimators)

study_estimands <- 
  list(study_1 = get_study_estimands,
       study_2 = get_study_estimands,
       study_3 = get_study_estimands,
       study_4 = get_study_estimands)

study_populations <- 
  declare_population(handler = get_stduy_populations, handler_args = meta_pop_args)

study_samples <- 
  declare_sampling(handler = get_study_samples, handler_args = meta_sample_args) 

study_estimands <- 
  declare_estimand(handler = get_study_estimands, handler_args = study_estimands) 

study_estimators <- 
  declare_estimator(handler = get_study_estimators, handler_args = study_estimators) 

meta_switch <- 
  declare_step(prep_study_estimators, handler_args = get_study_estimators)

meta_estimands <- 
  declare_estimand(handler = get_meta_estimands)

meta_estimators <- 
  declare_estimator(handler = get_meta_estimators)

meta_design <- 
  study_populations +
  study_samples +
  study_estimands +
  study_estimators +
  meta_switch +
  meta_estimands +
  meta_estimators

Implementation To-Do

[x] Need to write helper functions that will allow to perform single study declarations on multiple studies with fixed labels
- [x] population
- [x] sampling strategies
- [x] estimators
- [x] estimands
[x] Need a transformation function for meta_switch step
[x] Need to double-check and enhance Stan code for meta-analyses and

gerasy1987 commented 3 years ago

@macartan , maybe alternatively we can

Declare what is now multi_design, i.e. design for multiple studies with variety of populations, sampling strategies and estimators
Declare meta_population declaration that will call draw_estimands(multi_design) and draw_estimates(multi_design) and combine this into single data frame so that we have everything ready for meta analyses
Then go ahead and declare
- meta_estimands: still not clear how and if we want to get at those or just run estimation, and
- meta_estimators: based on Stan code draft above
Finally run the meta-analyses

This won't do everything in one run, but will allow use to cleanly draw study level data and pass it to meta declaration

gerasy1987 commented 3 years ago

From discussion today

[ ] Might want to think about moving meta-analysis to sub-study units (e.g. areas or districts within study)

gerasy1987 / hiddenmeta

Meta-analysis proposal #13

Draft structure

Meta declaration

Implementation To-Do