Closed jzhu20 closed 2 months ago
Confidence interval is obtained from variations in simulations. Therefore, different simulations need to be kept before taking mean, minimum or maximum values. A new data-process R script is as follows.
# Renaming variables if necessary, to remove prefix ---- names(data) <- sub("^mean_", "", names(data)) # Mean of simulations ---- ## List in summarise() needs to be in line with data ## ## Excluding missing values ## gen_data_weighted <- function(data) { data_weighted <- data |> group_by(source,time,simID) |> mutate(prevalence_stroke = prevalence_intracerebralhemorrhage+ prevalence_ischemicstroke + prevalence_subarachnoidhemorrhage, incidence_stroke = incidence_intracerebralhemorrhage + incidence_ischemicstroke + incidence_subarachnoidhemorrhage) |> summarise(weighted_income = weighted.mean(income,count), weighted_sector = weighted.mean(sector,count), weighted_sodium = weighted.mean(sodium,count,na.rm=TRUE), weighted_carbohydarte = weighted.mean(carbohydrate,count,na.rm=TRUE), weighted_fat = weighted.mean(fat,count,na.rm=TRUE), weighted_protein = weighted.mean(protein,count,na.rm=TRUE), weighted_energyintake = weighted.mean(energyintake,count,na.rm=TRUE), weighted_physicalactivity = weighted.mean(physicalactivity,count), weighted_bmi = weighted.mean(bmi,count,na.rm=TRUE), weighted_height = weighted.mean(height,count), weighted_weight = weighted.mean(weight,count,na.rm=TRUE), weighted_overweight = weighted.mean(over_weight,count), weighted_obesity = weighted.mean(obese_weight,count), wprev_ihd = weighted.mean(prevalence_ischemicheartdisease,count,na.rm=TRUE), wprev_diabetes = weighted.mean(prevalence_diabetes,count,na.rm=TRUE), wprev_stroke = weighted.mean(prevalence_stroke,count,na.rm=TRUE), wprev_asthma = weighted.mean(prevalence_asthma,count,na.rm=TRUE), wprev_ckd = weighted.mean(prevalence_chronickidneydisease , count,na.rm=TRUE), prevcase_ihd = sum(prevalence_ischemicheartdisease * count,na.rm=TRUE), prevcase_diabetes = sum(prevalence_diabetes * count,na.rm=TRUE), prevcase_stroke = sum(prevalence_stroke * count,na.rm=TRUE), prevcase_asthma = sum(prevalence_asthma * count,na.rm=TRUE), prevcase_ckd = sum(prevalence_chronickidneydisease * count,na.rm=TRUE), totalcase_ihd = sum(incidence_ischemicheartdisease * count,na.rm=TRUE), totalcase_diabetes = sum(incidence_diabetes * count,na.rm=TRUE), totalcase_stroke = sum(incidence_stroke * count,na.rm=TRUE), totalcase_asthma = sum(incidence_asthma * count,na.rm=TRUE), totalcase_ckd = sum(incidence_chronickidneydisease * count,na.rm=TRUE), weighted_disabilityweight = weighted.mean(disability_weight,count), weighted_death = weighted.mean(deaths,count), weighted_migrations = weighted.mean(migrations,count), total_yll = sum(yll*count,na.rm=TRUE), total_yld = sum(yld*count,na.rm=TRUE), total_daly = sum(daly*count,na.rm=TRUE)) return(data_weighted) } ## Diff ---- gen_data_weighted_rf <- function(data_weighted) { data_weighted_rf <- select(data_weighted, source, time, simID, weighted_sodium, weighted_energyintake, weighted_bmi,weighted_obesity) data_weighted_rf_wide <- pivot_wider(data_weighted_rf, names_from=source, id_cols = c(time,simID), values_from = c(weighted_sodium, weighted_energyintake, weighted_bmi,weighted_obesity)) data_weighted_rf_wide <- data_weighted_rf_wide |> mutate(diff_sodium = weighted_sodium_intervention - weighted_sodium_baseline, diff_ei = weighted_energyintake_intervention - weighted_energyintake_baseline, diff_bmi = weighted_bmi_intervention - weighted_bmi_baseline, diff_obesity = weighted_obesity_intervention - weighted_obesity_baseline) data_weighted_rf_wide_collapse <- data_weighted_rf_wide |> group_by(time) |> summarise(diff_sodium_mean = mean(diff_sodium), diff_sodium_min = min(diff_sodium), diff_sodium_max = max(diff_sodium), diff_ei_mean = mean(diff_ei), diff_ei_min = min(diff_ei), diff_ei_max = max(diff_ei), diff_bmi_mean = mean(diff_bmi), diff_bmi_min = min(diff_bmi), diff_bmi_max = max(diff_bmi), diff_obesity_mean = mean(diff_obesity), diff_obesity_min = min(diff_obesity), diff_obesity_max = max(diff_obesity)) return(data_weighted_rf_wide_collapse) } # Incidence diff ---- gen_data_weighted_ds <- function(data_weighted) { data_weighted_ds <- select(data_weighted, source, time, simID, totalcase_ihd, totalcase_diabetes,totalcase_stroke,totalcase_asthma, totalcase_ckd) data_weighted_ds_wide <- pivot_wider(data_weighted_ds, names_from=source, id_cols = c(time,simID), values_from = c(totalcase_ihd, totalcase_diabetes, totalcase_stroke, totalcase_asthma, totalcase_ckd)) data_weighted_ds_wide <- data_weighted_ds_wide |> mutate(diff_inc_ihd = 100*(totalcase_ihd_intervention - totalcase_ihd_baseline), diff_inc_db = 100*(totalcase_diabetes_intervention - totalcase_diabetes_baseline), diff_inc_stroke = 100*(totalcase_stroke_intervention - totalcase_stroke_baseline), diff_inc_asthma = 100*(totalcase_asthma_intervention - totalcase_asthma_baseline), diff_inc_ckd = 100*(totalcase_ckd_intervention - totalcase_ckd_baseline)) data_weighted_ds_wide <- data_weighted_ds_wide |> group_by(simID) |> mutate(cumdiff_inc_ihd = cumsum(diff_inc_ihd), cumdiff_inc_db = cumsum(diff_inc_db), cumdiff_inc_stroke = cumsum(diff_inc_stroke), cumdiff_inc_asthma = cumsum(diff_inc_asthma), cumdiff_inc_ckd = cumsum(diff_inc_ckd)) data_weighted_ds_wide_collapse <- data_weighted_ds_wide |> group_by(time) |> summarise(diff_inc_ihd_mean = mean(cumdiff_inc_ihd), diff_inc_ihd_min = min(cumdiff_inc_ihd), diff_inc_ihd_max = max(cumdiff_inc_ihd), diff_inc_db_mean = mean(cumdiff_inc_db), diff_inc_db_min = min(cumdiff_inc_db), diff_inc_db_max = max(cumdiff_inc_db), diff_inc_stroke_mean = mean(cumdiff_inc_stroke), diff_inc_stroke_min = min(cumdiff_inc_stroke), diff_inc_stroke_max = max(cumdiff_inc_stroke), diff_inc_asthma_mean = mean(cumdiff_inc_asthma), diff_inc_asthma_min = min(cumdiff_inc_asthma), diff_inc_asthma_max = max(cumdiff_inc_asthma), diff_inc_ckd_mean = mean(cumdiff_inc_ckd), diff_inc_ckd_min = min(cumdiff_inc_ckd), diff_inc_ckd_max = max(cumdiff_inc_ckd) ) return(data_weighted_ds_wide_collapse) } # Burden of disease ---- gen_data_weighted_burden <- function(data_weighted) { data_weighted_burden <- select(data_weighted, source, time, simID, total_yll, total_yld,total_daly) data_weighted_burden_wide <- pivot_wider(data_weighted_burden, names_from=source, id_cols = c(time,simID), values_from = c(total_yll, total_yld, total_daly)) data_weighted_burden_wide <- data_weighted_burden_wide |> mutate(diff_yll = (total_yll_intervention - total_yll_baseline)/1000, diff_yld = (total_yld_intervention - total_yld_baseline)/1000, diff_daly = (total_daly_intervention - total_daly_baseline)/1000) data_weighted_burden_wide <- data_weighted_burden_wide |> group_by(simID) |> mutate(cumdiff_daly = cumsum(diff_daly), cumdiff_yll = cumsum(diff_yll), cumdiff_yld = cumsum(diff_yld)) data_weighted_burden_wide_collapse <- data_weighted_burden_wide |> group_by(time) |> summarise(diff_daly_mean = mean(diff_daly), diff_daly_min = min(diff_daly), diff_daly_max = max(diff_daly), diff_yll_mean = mean(diff_yll), diff_yll_min = min(diff_yll), diff_yll_max = max(diff_yll), diff_yld_mean = mean(diff_yld), diff_yld_min = min(diff_yld), diff_yld_max = max(diff_yld), cumdiff_daly_mean = mean(cumdiff_daly), cumdiff_daly_min = min(cumdiff_daly), cumdiff_daly_max = max(cumdiff_daly), cumdiff_yll_mean = mean(cumdiff_yll), cumdiff_yll_min = min(cumdiff_yll), cumdiff_yll_max = max(cumdiff_yll), cumdiff_yld_mean = mean(cumdiff_yld), cumdiff_yld_min = min(cumdiff_yld), cumdiff_yld_max = max(cumdiff_yld)) return(data_weighted_burden_wide_collapse) } ############ ## This last function is data smoothing ## It is applied manually now in India project due to abnormal positive values in diff_daly or cumdiff_daly gen_data_weighted_burden_spline <- function(data_weighted_burden) { ## Only keep those 0 or negative values ## Notes: Delete years 27,30,32-33 for ps3-low; Delete years 2028 for ps4-low data_weighted_burden_mean <- data_weighted_burden |> filter(cumdiff_daly_mean <= 0) data_weighted_burden_min <- data_weighted_burden |> filter(cumdiff_daly_min <= 0) ## Notes: Delete years 29, 31 for ps2-high; Delete 37-38 for ps3-low; Delete 33-34 for ps4-middle; Delete 36-38 for ps4-low data_weighted_burden_max <- data_weighted_burden |> filter(cumdiff_daly_max <= 0) ## New data frame data_weighted_burden_spline <- data.frame(time = seq(min(data_weighted_burden$time), max(data_weighted_burden$time), length.out = 34)) ## Fit spline and predict spline_fit <- interpSpline(data_weighted_burden_mean$time, data_weighted_burden_mean$cumdiff_daly_mean) data_weighted_burden_spline$cumdiff_daly_mean <- predict(spline_fit, data_weighted_burden_spline$time)$y spline_fit_min <- interpSpline(data_weighted_burden_min$time, data_weighted_burden_min$cumdiff_daly_min) data_weighted_burden_spline$cumdiff_daly_min <- predict(spline_fit_min, data_weighted_burden_spline$time)$y ## Use smooth.spline for ps4-low spline_fit_max <- interpSpline(data_weighted_burden_max$time, data_weighted_burden_max$cumdiff_daly_max) data_weighted_burden_spline$cumdiff_daly_max <- predict(spline_fit_max, data_weighted_burden_spline$time)$y ## Keep 0 values in the first two years data_weighted_burden_spline$cumdiff_daly_mean <- ifelse(data_weighted_burden_spline$time<2024, 0, data_weighted_burden_spline$cumdiff_daly_mean) data_weighted_burden_spline$cumdiff_daly_min <- ifelse(data_weighted_burden_spline$time<2024, 0, data_weighted_burden_spline$cumdiff_daly_min) data_weighted_burden_spline$cumdiff_daly_max <- ifelse(data_weighted_burden_spline$time<2024, 0, data_weighted_burden_spline$cumdiff_daly_max) return(data_weighted_burden_spline) }
Confidence interval is obtained from variations in simulations. Therefore, different simulations need to be kept before taking mean, minimum or maximum values. A new data-process R script is as follows.