Open fb-elong opened 3 weeks ago
library(doFuture)
library(dplyr)
plan(multisession)
data_path <- "./{i_res}.Rdata"
n_repeat <-1000
alpha <- 0.01
L = seq(1000, 66000, 1000)
upper = (sqrt(1.353 + 1.1*alpha*L) + 1.163)^2
lower = (sqrt(1.353 + 0.9*alpha*L) - 1.163)^2
res_p <- foreach(i_res = 1:n_repeat) %dofuture% {
x <- NA
try({
load(glue::glue(data_path))
res_rerandom <- res_rerandom %>%
subset(method == "continous")
print(dim(res_rerandom))
res <- res %>%
subset(method == "continous")
p_true = mean(res_rerandom$z > res_rerandom$res_z)
p_para_1000 = 2* (1 - pnorm( mean(abs(res_rerandom$res_z[1:1000]) / sd(res_rerandom$z[1:1000])) ))
p_para_5000 = 2* (1 - pnorm( mean(abs(res_rerandom$res_z[1:5000]) / sd(res_rerandom$z[1:5000])) ))
p_para_10000 = 2* (1 - pnorm( mean(abs(res_rerandom$res_z[1:10000]) / sd(res_rerandom$z[1:10000])) ))
p_pr = mean(res_rerandom$z[1:65695] > res_rerandom$res_z[1:65695])
m = cumsum(res_rerandom$z > res_rerandom$res_z)[L]
L_m = L[m < lower | m > upper | c(rep(FALSE, 65), TRUE)][1]
p_adaptive = mean(res_rerandom$z[1:L_m] > res_rerandom$res_z[1:L_m])
x <- data.frame(p_true = p_true, p_para_1000 = p_para_1000, p_para_5000 = p_para_5000, p_para_10000 = p_para_10000, p_pr = p_pr, p_adaptive = p_adaptive, L_adaptive = L_m, r2 = res$r2)
})
x
}
res_p <- bind_rows(res_p[! is.na(res_p)])
# Using code below for the table
res_p %>%
summarize(
p = mean(p_true),
CP_1000 = mean( (p_para_1000 < alpha) == (p_true < alpha)) * 100,
CP_5000 = mean((p_para_5000 < alpha) == (p_true < alpha)) * 100,
CP_10000 = mean((p_para_10000 < alpha) == (p_true < alpha)) * 100,
CP_PR = mean((p_pr < alpha) == (p_true < alpha) ) * 100,
P_adaptive = mean((p_adaptive < alpha) == (p_true < alpha) ) * 100,
L_min = min(L_adaptive),
L_max = max(L_adaptive),
L_mean = mean(L_adaptive),
P_max = mean(L_adaptive == 66000) * 100
)
# Using code below to understand R2 distribution
summary(res_p$r2)
# also create a histogram of res_p$r2
# 1. Open the .Rproj file using Rstudio
# 2. Run the code devtools::install() in the Rstudio console
# Save all functions in the R folder with proper documentation using usethis::use_r()
# Save all required dataset in the data folder using usethis::use_data()
print(list.files())
.libPaths("./pkg")
library(dplyr)
library(survival)
library(rerandom)
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
stop("No arguments supplied")
} else {
for (i in 1:length(args)) {
eval(parse(text = args[[i]]))
}
}
# Get task id for each simulation job (not run in Rstudio Serve)
task_id <- SGE_TASK_ID
#print(Sys.getenv("SGE_TASK_ID"))
#task_id <- as.integer(Sys.getenv("SGE_TASK_ID"))
# Set up Simulation Environment
#task_id <- 1 # used for debugging purpose
print(task_id)
set.seed(task_id)
#---------- Simulation Setup --------------------#
n <- 200
n_rerandom <- 5e+5
# n_rerandom <- 10
stratum <- define_stratum(
site = rep(1, 50),
ecog = c("0" = 1, "1" = 1),
tmb = c("<=6" = 1, ">6 and <= 12" = 1, ">12" = 1) )
treatment <- c("drug", "placebo")
df <- n %>%
simu_stratum(stratum = stratum) %>%
simu_treatment_minimization(treatment = treatment,
prob = define_prob(0.9, treatment),
ratio = c(1, 1),
imbalance_fun = imbalance_fun_range)
beta <- list(
site = rnorm(n = 50, 0.2),
ecog = c(0.3, 0.5),
tmb = c(-0.3, 0, 0.3),
treatment = c(0.2, 1.175)
)
l <- beta$site[as.numeric(df$site)] +
beta$ecog[as.numeric(df$ecog)] +
beta$tmb[as.numeric(df$tmb)] +
beta$treatment[as.numeric(df$treatment)]
p <- exp(l) / (1 + exp(l))
c_min <- 0
c_max <- 5
time <- exp(l + log(- log(runif(n)))) # PH model based on transformation model
cen <- runif(n, min = c_min, max = c_max) # Uniform censoring
df$l <- l
df$p <- p
df$binary <- rbinom(n = n, size = 1, prob = p)
df$continous <- l + rnorm(n, sd = 2)
df$surv_time <- pmin(time, cen)
df$surv_status <- time < cen
# Analysis of single data
fit_rerandom <- function(ana){
fit_binary <- glm(binary ~ treatment + ecog + tmb, family = "binomial", data = ana)
fit_continous <- lm(continous ~ treatment + ecog + tmb, data = ana)
fit_survival <- survdiff(Surv(surv_time, surv_status) ~ treatment + strata(ecog) + strata(tmb), data=ana)
binary <- list(est = summary(fit_binary)$coeff[2, 1],
sd = summary(fit_binary)$coeff[2, 2],
z = summary(fit_binary)$coeff[2, 3],
wald_p = summary(fit_binary)$coeff[2, 4])
continous <- list(est = summary(fit_continous)$coeff[2, 1],
sd = summary(fit_continous)$coeff[2, 2],
z = summary(fit_continous)$coeff[2, 3],
wald_p = summary(fit_continous)$coeff[2, 4],
l_mean = mean(l),
r2 = (lm(continous ~ l, data = ana) %>% summary())$r.squared)
survival <- list(est = NULL,
sd = NULL,
z = sqrt(fit_survival$chisq),
wald_p = 1 - pchisq(fit_survival$chisq, df = 1) )
dplyr::bind_rows(binary = binary,
continous = continous,
survival = survival, .id = "method")
}
# observed data
res <- fit_rerandom(df)
res
# re-randomization
res_rerandom <- replicate(n_rerandom, {
df_rerandom_trt <- df[, c("site", "ecog", "tmb")] %>%
simu_treatment_minimization(treatment = treatment,
prob = define_prob(0.9, treatment),
ratio = c(1, 1),
imbalance_fun = imbalance_fun_range)
df_rerandom <- df
df_rerandom$treatment <- df_rerandom_trt$treatment
tmp <- fit_rerandom(df_rerandom)
tmp$res_z <- res$z
tmp$res_p <- res$wald_p
tmp
}, simplify = FALSE)
res_rerandom <- dplyr::bind_rows(res_rerandom, .id = "id")
res1 <- res_rerandom %>%
group_by(method) %>%
summarise(sd_200 = sd(z[1:200]),
full_p = mean(abs(z) > abs(res_z)) )
res2 <- res %>%
left_join(res1) %>%
mutate(parat_p = 2*(1 - pt(q = z / sd_200, df = 200 - 1)),
paran_p = 2*(1 - pnorm(z, sd = sd_200)) )
res2
save.image(file = paste0("./result1.175/", task_id, ".Rdata"))