r-causal / causal-inference-in-R

Causal Inference in R: A book!
https://www.r-causal.org/
181 stars 48 forks source link

16.02: Calculating estimates with G-Computation #96

Open malcolmbarrett opened 2 years ago

malcolmbarrett commented 2 years ago
LucyMcGowan commented 11 months ago

Here is a post with IPCW vs g-comp for a specific DAG that is interesting: https://stats.stackexchange.com/questions/628315/correcting-for-selection-bias-with-standardisation-g-computation

library(tidyverse)

sim_data <- function(n,seed){
  set.seed(seed)
  A <- rbinom(n, 1, 0.5)
  U <- rnorm(n)
  L <- rbinom(n, 1, plogis(-0.5 + A + U))
  C <- rbinom(n, 1, plogis(-1 + 3*L))
  Y <- U + rnorm(n)
  tibble(A,L,C,Y)
}

df <- sim_data(n=100000,seed=123)
# IPC weighting
C_model <- 
  glm(C==0 ~ L, data = df, family = binomial)

df$ipc_weights <- 
  1/predict(C_model, newdata = df, 
            type = "response")

Y_model <- 
  lm(Y ~ A, 
     data = filter(df, C == 0), 
     weights = 
       filter(df, C == 0)$ipc_weights)

coef(Y_model)["A"]
#0.0001701217
# G-computation
df_c <- filter(df, C == 0)

treated_L0 <- 
  mean(df_c[df_c$A == 1 & df_c$L==0,]$Y)
treated_L1 <- 
  mean(df_c[df_c$A == 1 & df_c$L==1,]$Y)
untreated_L0 <- 
  mean(df_c[df_c$A == 0 & df_c$L==0,]$Y)
untreated_L1 <- 
  mean(df_c[df_c$A == 0 & df_c$L==1,]$Y)

Pr_L0 <- sum(df$L==0)/nrow(df)
Pr_L1 <- 1 - Pr_L0

out <- ((treated_L0 - untreated_L0)*Pr_L0) +
  ((treated_L1 - untreated_L1)*Pr_L1)

out
# -0.1629754