elong0527 / rerandom

Efficient calculation for re-randomization test
GNU General Public License v3.0
0 stars 0 forks source link

Example Code to Summarize Simulation Results #2

Open fb-elong opened 3 weeks ago

fb-elong commented 3 weeks ago
n_repeat <-1000
alpha <- 0.01 
L = seq(1000, 66000, 1000)
upper = (sqrt(1.353 + 1.1*alpha*L) + 1.163)^2
lower = (sqrt(1.353 + 0.9*alpha*L) - 1.163)^2

res_p <- foreach(i_res = 1:n_repeat) %dofuture% {
    x <- NA
    try({
        load(glue::glue(data_path))

        p_true = mean(res_rerandom$z > res_rerandom$res_z)
        p_para_1000 =  2* (1 -  pnorm( mean(abs(res_rerandom$res_z[1:1000]) / sd(res_rerandom$z[1:1000])) ))
        p_para_5000 =  2* (1 -  pnorm( mean(abs(res_rerandom$res_z[1:5000]) / sd(res_rerandom$z[1:5000])) ))
        p_para_10000 =  2* (1 -  pnorm( mean(abs(res_rerandom$res_z[1:10000]) / sd(res_rerandom$z[1:10000])) ))
        p_pr = mean(res_rerandom$z[1:65695] > res_rerandom$res_z[1:65695])

        m = cumsum(res_rerandom$z > res_rerandom$res_z)[L]
        L_m = L[m < lower | m > upper | c(rep(FALSE, 65), TRUE)][1]
        p_adaptive = mean(res_rerandom$z[1:L_m] > res_rerandom$res_z[1:L_m])

        x <- data.frame(p_true = p_true, p_para_1000 = p_para_1000, p_para_5000 = p_para_5000,  p_para_10000 = p_para_10000, p_pr = p_pr, p_adaptive = p_adaptive, L_adaptive = L_m, r2 = res$r2)
    })
    x
}
res_p <- bind_rows(res_p) 

# Using code below for the table
res_p %>%
    summarize(
        p = mean(p_true), 
        CP_1000 = mean( (p_para_1000 < alpha) == (p_true < alpha)) * 100,
        CP_5000 = mean((p_para_5000 < alpha) == (p_true < alpha)) * 100,
        CP_10000 = mean((p_para_10000 < alpha) == (p_true < alpha)) * 100,
        CP_PR = mean((p_pr < alpha)  == (p_true < alpha) ) * 100,
        P_adaptive = mean((p_adaptive < alpha) == (p_true < alpha) ) * 100, 
        L_min = min(L_adaptive), 
        L_max = max(L_adaptive), 
        L_mean = mean(L_adaptive), 
        P_max = mean(L_adaptive == 66000)
    )

# Using code below to understand R2 distribution 
summary(res_p$r2)
# also create a histogram of res_p$r2
wangben718 commented 2 weeks ago
library(doFuture)
library(dplyr)

plan(multisession)

data_path <- "./{i_res}.Rdata"
n_repeat <-1000
alpha <- 0.01 
L = seq(1000, 66000, 1000)
upper = (sqrt(1.353 + 1.1*alpha*L) + 1.163)^2
lower = (sqrt(1.353 + 0.9*alpha*L) - 1.163)^2

res_p <- foreach(i_res = 1:n_repeat) %dofuture% {
  x <- NA
  try({
    load(glue::glue(data_path))
    res_rerandom <- res_rerandom %>% 
      subset(method == "continous")
    print(dim(res_rerandom))

    res <- res %>% 
      subset(method == "continous")
    p_true = mean(res_rerandom$z > res_rerandom$res_z)
    p_para_1000 =  2* (1 -  pnorm( mean(abs(res_rerandom$res_z[1:1000]) / sd(res_rerandom$z[1:1000])) ))
    p_para_5000 =  2* (1 -  pnorm( mean(abs(res_rerandom$res_z[1:5000]) / sd(res_rerandom$z[1:5000])) ))
    p_para_10000 =  2* (1 -  pnorm( mean(abs(res_rerandom$res_z[1:10000]) / sd(res_rerandom$z[1:10000])) ))
    p_pr = mean(res_rerandom$z[1:65695] > res_rerandom$res_z[1:65695])

    m = cumsum(res_rerandom$z > res_rerandom$res_z)[L]
    L_m = L[m < lower | m > upper | c(rep(FALSE, 65), TRUE)][1]
    p_adaptive = mean(res_rerandom$z[1:L_m] > res_rerandom$res_z[1:L_m])

    x <- data.frame(p_true = p_true, p_para_1000 = p_para_1000, p_para_5000 = p_para_5000,  p_para_10000 = p_para_10000, p_pr = p_pr, p_adaptive = p_adaptive, L_adaptive = L_m, r2 = res$r2)
  })
  x
}
res_p <- bind_rows(res_p[! is.na(res_p)]) 

# Using code below for the table
res_p %>%
  summarize(
    p = mean(p_true), 
    CP_1000 = mean( (p_para_1000 < alpha) == (p_true < alpha)) * 100,
    CP_5000 = mean((p_para_5000 < alpha) == (p_true < alpha)) * 100,
    CP_10000 = mean((p_para_10000 < alpha) == (p_true < alpha)) * 100,
    CP_PR = mean((p_pr < alpha)  == (p_true < alpha) ) * 100,
    P_adaptive = mean((p_adaptive < alpha) == (p_true < alpha) ) * 100, 
    L_min = min(L_adaptive), 
    L_max = max(L_adaptive), 
    L_mean = mean(L_adaptive), 
    P_max = mean(L_adaptive == 66000) * 100
  )

# Using code below to understand R2 distribution 
summary(res_p$r2)
# also create a histogram of res_p$r2
wangben718 commented 2 weeks ago
# 1. Open the .Rproj file using Rstudio
# 2. Run the code devtools::install() in the Rstudio console

# Save all functions in the R folder with proper documentation using usethis::use_r() 
# Save all required dataset in the data folder using usethis::use_data()
print(list.files())
.libPaths("./pkg")
library(dplyr)
library(survival)
library(rerandom)

args <- commandArgs(trailingOnly = TRUE)

if (length(args) == 0) {
  stop("No arguments supplied")
} else {
  for (i in 1:length(args)) {
    eval(parse(text = args[[i]]))
  }
}

# Get task id for each simulation job (not run in Rstudio Serve)
task_id <- SGE_TASK_ID
#print(Sys.getenv("SGE_TASK_ID"))
#task_id <- as.integer(Sys.getenv("SGE_TASK_ID"))

# Set up Simulation Environment

#task_id <- 1  # used for debugging purpose
print(task_id)
set.seed(task_id)

#---------- Simulation Setup --------------------#

n <- 200
n_rerandom <- 5e+5
# n_rerandom <- 10

stratum <- define_stratum(
  site = rep(1, 50),
  ecog = c("0" = 1, "1" = 1),
  tmb = c("<=6" = 1, ">6 and <= 12" = 1, ">12" = 1) )

treatment <- c("drug", "placebo")

df <- n %>% 
  simu_stratum(stratum = stratum) %>%
  simu_treatment_minimization(treatment = treatment, 
                              prob = define_prob(0.9, treatment), 
                              ratio = c(1, 1),
                              imbalance_fun = imbalance_fun_range)

beta <- list(
  site = rnorm(n = 50, 0.2),
  ecog = c(0.3, 0.5), 
  tmb = c(-0.3, 0, 0.3),
  treatment = c(0.2, 1.175)
)

l <- beta$site[as.numeric(df$site)] + 
  beta$ecog[as.numeric(df$ecog)] + 
  beta$tmb[as.numeric(df$tmb)] + 
  beta$treatment[as.numeric(df$treatment)]

p <- exp(l) / (1 + exp(l)) 

c_min <- 0
c_max <- 5

time <- exp(l + log(- log(runif(n)))) # PH model based on transformation model
cen <- runif(n, min = c_min, max = c_max) # Uniform censoring  

df$l <- l
df$p <- p
df$binary <- rbinom(n = n, size = 1, prob = p)
df$continous <- l + rnorm(n, sd = 2)
df$surv_time <- pmin(time, cen)
df$surv_status <- time < cen

# Analysis of single data

fit_rerandom <- function(ana){

  fit_binary <- glm(binary ~ treatment + ecog + tmb, family = "binomial", data = ana)
  fit_continous <- lm(continous ~ treatment + ecog + tmb, data = ana)
  fit_survival <- survdiff(Surv(surv_time, surv_status) ~ treatment + strata(ecog) + strata(tmb), data=ana)

  binary <- list(est = summary(fit_binary)$coeff[2, 1], 
                 sd = summary(fit_binary)$coeff[2, 2], 
                 z = summary(fit_binary)$coeff[2, 3], 
                 wald_p = summary(fit_binary)$coeff[2, 4])

  continous <- list(est = summary(fit_continous)$coeff[2, 1], 
                    sd = summary(fit_continous)$coeff[2, 2], 
                    z = summary(fit_continous)$coeff[2, 3], 
                    wald_p = summary(fit_continous)$coeff[2, 4], 
                    l_mean = mean(l),
                    r2 = (lm(continous ~ l, data = ana) %>% summary())$r.squared)

  survival <- list(est = NULL,
                   sd = NULL, 
                   z = sqrt(fit_survival$chisq), 
                   wald_p = 1 - pchisq(fit_survival$chisq, df = 1) )

  dplyr::bind_rows(binary = binary, 
                   continous = continous, 
                   survival = survival, .id = "method")

}

# observed data

res <- fit_rerandom(df)
res

# re-randomization 

res_rerandom <- replicate(n_rerandom, {

  df_rerandom_trt <- df[, c("site", "ecog", "tmb")] %>%
    simu_treatment_minimization(treatment = treatment, 
                                prob = define_prob(0.9, treatment), 
                                ratio = c(1, 1),
                                imbalance_fun = imbalance_fun_range)
  df_rerandom <- df
  df_rerandom$treatment <- df_rerandom_trt$treatment

  tmp <- fit_rerandom(df_rerandom)
  tmp$res_z <- res$z
  tmp$res_p <- res$wald_p
  tmp
}, simplify = FALSE)

res_rerandom <- dplyr::bind_rows(res_rerandom, .id = "id")

res1 <- res_rerandom %>% 
  group_by(method) %>% 
  summarise(sd_200 = sd(z[1:200]), 
            full_p = mean(abs(z) > abs(res_z)) ) 

res2 <- res %>% 
  left_join(res1) %>% 
  mutate(parat_p = 2*(1 - pt(q = z / sd_200, df = 200 - 1)), 
         paran_p = 2*(1 - pnorm(z, sd = sd_200)) )

res2

save.image(file = paste0("./result1.175/", task_id, ".Rdata"))