M.Sc. Project ,The University of Burdwan
library(ggplot2) library(ggpubr) library(broom) library(dplyr)
cirrhosis_prediction<-read.csv("D:/DataSets/Cirrohis prediction/cirrhosis prediction.csv") nrow(cirrhosis_prediction) ncol(cirrhosis_prediction) summary(cirrhosis_prediction) str(cirrhosis_prediction)
cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Bilirubin) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Cholesterol) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Albumin) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Copper) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Alk_Phos) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$SGOT) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Prothrombin) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Platelets) cor(cirrhosis_prediction$Albumin,cirrhosis_prediction$Tryglicerides)
cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Bilirubin) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Cholesterol) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Albumin) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Copper) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Alk_Phos) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$SGOT) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Prothrombin) cor(cirrhosis_prediction$N_Days,cirrhosis_prediction$Platelets) cor(cirrhosis_prediction$Albumin,cirrhosis_prediction$Tryglicerides)
sf<-table(cirrhosis_prediction$Status) sf Drugf<-table(cirrhosis_prediction$Drug) Drugf Sexf<-table(cirrhosis_prediction$Sex) Sexf Ascitesy<-table(cirrhosis_prediction$Ascites) Ascitesy Hepatof<-table(cirrhosis_prediction$Hepatomegaly) Hepatof Spiders<-table(cirrhosis_prediction$Spiders) Spiders Edm<-table(cirrhosis_prediction$Edema) Edm
cirrhosis_prediction$Status=factor(cirrhosis_prediction$Status,levels<-c('D','C','CL'),levels<-c(1,0,0)) #Dummy 'Death' to'1','Censored' to '0','CL_censored due to liver tx' to '0'
cirrhosis_prediction$Drug=factor(cirrhosis_prediction$Drug,levels<-c('D-penicillamine','Placebo'),levels<-c(1,0)) #dummy 'Penicilliamine' to '1',placebo to '0' cirrhosis_prediction$Sex=factor(cirrhosis_prediction$Sex,levels<-c('F','M'),levels<-c(1,2)) #male to '1', female to '2' cirrhosis_prediction$Ascites=factor(cirrhosis_prediction$Ascites,levels<-c('Y','N'),levels<-c(1,0)) # 'Y' to '1','N' to '0' cirrhosis_prediction$Hepatomegaly=factor(cirrhosis_prediction$Hepatomegaly,levels<-c('Y','N'),levels<-c(1,0)) cirrhosis_prediction$Spiders=factor(cirrhosis_prediction$Spiders,levels<-c('Y','N'),levels<-c(1,0)) cirrhosis_prediction$Edema=factor(cirrhosis_prediction$Edema,levels<-c('Y','N','S'),levels<-c(1,0,3)) #N to no edima & diuretic,Y present both,S only edima
View(cirrhosis_prediction)
cirrhosis_prediction$Cholesterol[is.na(cirrhosis_prediction$Cholesterol)]<-median(cirrhosis_prediction$Cholesterol,na.rm = TRUE) cirrhosis_prediction$Copper[is.na(cirrhosis_prediction$Copper)]<-median(cirrhosis_prediction$Copper,na.rm = TRUE)
cirrhosis_prediction$Alk_Phos[is.na(cirrhosis_prediction$Alk_Phos)]<-median(cirrhosis_prediction$Alk_Phos,na.rm = TRUE) cirrhosis_prediction$SGOT[is.na(cirrhosis_prediction$SGOT)]<-median(cirrhosis_prediction$SGOT,na.rm = TRUE)
cirrhosis_prediction$Cholesterol[is.na(cirrhosis_prediction$Cholesterol)]<-median(cirrhosis_prediction$Cholesterol,na.rm = TRUE) cirrhosis_prediction$Tryglicerides[is.na(cirrhosis_prediction$Tryglicerides)]<-median(cirrhosis_prediction$Tryglicerides,na.rm = TRUE)
cirrhosis_prediction$Platelets[is.na(cirrhosis_prediction$Platelets)]<-median(cirrhosis_prediction$Platelets,na.rm = TRUE) cirrhosis_prediction$Prothrombin[is.na(cirrhosis_prediction$Prothrombin)]<-median(cirrhosis_prediction$Prothrombin,na.rm = TRUE)
cirrhosis_prediction$Stage[is.na(cirrhosis_prediction$Stage)]<-median(cirrhosis_prediction$Stage,na.rm = TRUE)
library(survival) library(survminer)
library("survival")
sfit<-survfit(Surv(cirrhosis_prediction$N_Days,cirrhosis_prediction$Status==1 )~1,data = cirrhosis_prediction) summary(sfit)
sfit<-survfit(Surv(cirrhosis_prediction$N_Days,cirrhosis_prediction$Status==0 )~1,data = cirrhosis_prediction) summary(sfit)
sfit<-survfit(Surv(cirrhosis_prediction$N_Days,cirrhosis_prediction$Status==1 )~Sex,data = cirrhosis_prediction) summary(sfit)
sfit<-survfit(Surv(cirrhosis_prediction$N_Days,cirrhosis_prediction$Status==1 )~Sex,data = cirrhosis_prediction) plot(sfit)
library(survminer) ggsurvplot(sfit)
#legend.labs=c("Male", "Female"), legend.title="Sex",
# palette=c("dodgerblue2", "orchid2"),
#title="Kaplan-Meier Curve for Cirrhosis Prediction Survival",
#risk.table.height=.15)
ggsurvplot(sfit, data = cirrhosis_prediction, conf.int = TRUE, pval = TRUE, fun = "pct", risk.table = TRUE, size = 1, linetype = "strata", palette = c("dodgerblue2", "orchid2"),title="Kaplan-Meier Curve for Cirrhosis Prediction Survival", legend = "bottom", legend.title = "Sex", legend.labs = c("Male", "Female"))
library(survival) coxm<-coxph(Surv(cirrhosis_prediction$N_Days,cirrhosis_prediction$Status)~Drug+Age+Sex+Ascites+Hepatomegaly+Spiders+Edema+Bilirubin+Cholesterol+Albumin+Copper+Alk_Phos+SGOT+Tryglicerides+Platelets+Prothrombin+Stage,id=ID,data = cirrhosis_prediction)
library(car) vif(coxm)
summary(coxm) ggforest(coxm,data = cirrhosis_prediction)
ftestcox<-cox.zph(coxm) ftestcox
library(survminer) ggcoxzph(ftestcox)
# "Male", "Female")
library(dplyr) library(ggplot2) library(knitr) library(ciTools) library(here) set.seed(20180925) kable(head(cirrhosis_prediction)) ggplot(cirrhosis_prediction,aes(x=Bilirubin+Albumin+Copper+Alk_Phos+Prothrombin,N_Days))+geom_point(aes(color = factor(Status)))+ggtitle("censored obs. in red")+theme_bw() ggplot(cirrhosis_prediction,aes(x=Drug,N_Days))+geom_point(aes(color = factor(Status)))+ggtitle("censored obs. in red")+theme_bw() cirrho01<-na.omit(cirrhosis_prediction) f11<-survreg(Surv(N_Days,Status==1)~.,data = cirrho01)
summary(f11) f11
with_ints <- ciTools::add_ci(cirrho01,f11, names = c("lcb", "ucb")) %>% ciTools::add_pi(f11, names = c("lpb", "upb")) kable(head(with_ints))
ggplot(with_ints, aes(x = Bilirubin+Albumin+Copper+Alk_Phos+Prothrombin, y = N_Days)) + geom_point(aes(color = Status)) + facet_wrap(~Status)+ theme_bw() + ggtitle("Model fit with 95% CIs and PIs", "solid line = mean, dotted line = median") + geom_line(aes(y = mean_pred), linetype = 1) + geom_line(aes(y = median_pred), linetype = 2) + geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5) + geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.1)
probs <- ciTools::add_probs(cirrho01, f11, q = 4000, name = c("prob", "lcb", "ucb"), comparison = ">")
ggplot(probs, aes(x = Bilirubin+Albumin+Copper+Alk_Phos+Prothrombin, y = prob)) + ggtitle("Estimated prob. of avg. time lasting longer than 1000 days") + ylim(c(0,1)) + facet_wrap(~Status)+ theme_bw() + geom_line(aes(y = prob)) + geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5)
quants <- ciTools::add_quantile(cirrho01, f11, p = 0.90, name = c("quant", "lcb", "ucb"))
ggplot(quants, aes(x =Bilirubin+Albumin+Copper+Alk_Phos+Prothrombin , y = N_Days)) + geom_point(aes(color = Status)) + ggtitle("Estimated 90th percentile of condtional failure distribution, with CI") + facet_wrap(~Status)+ theme_bw() + geom_line(aes(y = quant)) + geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5)
library(flexsurv)
sim_run <- function() {
cov <- data.frame(ID = 1:2000, trt = rbinom(2000, 1, 0.5))
dat <- simsurv(lambdas = 0.1, gammas = 1.5, betas = c(trt = -0.5), x = cov, maxt = 5)
dat <- merge(cov, dat)
mod <- flexsurv::flexsurvspline(Surv(eventtime, status) ~ trt, data = dat)
est <- mod$coefficients[["trt"]] ses <- sqrt(diag(mod$cov))[["trt"]] cil <- est + qnorm(.025) ses ciu <- est + qnorm(.975) ses
c(bias = est - (-0.5), coverage = ((-0.5 > cil) && (-0.5 < ciu))) }
set.seed(908070)
rowMeans(replicate(100, sim_run()))
data.class(cirrhosis_prediction)
mod_weib <- flexsurvspline(Surv(N_Days,Status==1) ~Drug+Bilirubin+Prothrombin+Copper ,data = cirrhosis_prediction, k = 0)
mod_flex <- flexsurv::flexsurvspline(Surv(N_Days,Status==1) ~ Drug+Bilirubin+Prothrombin+Copper, data = cirrhosis_prediction, k = 3)
par(mfrow = c(1,2), cex = 0.85) # graphics parameters plot(mod_weib, main = "Weibull model", ylab = "Survival probability", xlab = "Time") plot(mod_flex, main = "Flexible parametric model", ylab = "Survival probability", xlab = "Time")
true_mod <- flexsurv::flexsurvspline(Surv(N_Days,Status==1) ~ Stage, data = cirrhosis_prediction, k = 3)
sim_run <- function(true_mod) {
cov <- data.frame(ID = 1:2000, Stage = rbinom(2000, 1, 0.5))
dat <- simsurv(betas = true_mod$coefficients, # "true" parameter values x = cov, # covariate data for 200 individuals knots = true_mod$knots, # knot locations for splines logcumhazard = logcumhazard, # definition of log cum hazard maxt = NULL, # no right-censoring interval = c(1E-8,100000)) # interval for root finding
dat <- merge(cov, cirrhosis_prediction)
weib_mod <- flexsurv::flexsurvspline(Surv(N_Days,Status==1) ~ Stage, data = cirrhosis_prediction, k = 0)
flex_mod <- flexsurv::flexsurvspline(Surv(N_Days,Status==1) ~ Stage, data = cirrhosis_prediction, k = 3)
true_loghr <- true_mod$coefficients[["Stage"]] weib_loghr <- weib_mod$coefficients[["Stage"]] flex_loghr <- flex_mod$coefficients[["Stage"]]
c(weib_bias = weib_loghr - true_loghr, flex_bias = flex_loghr - true_loghr) }
set.seed(543543)
rowMeans(replicate(100, sim_run(true_mod = true_mod)))
survfitw<-survreg(Surv(N_Days,Status==0)~.,dist = "weibull",data = cirrhosis_prediction) summary(survfitw)
survfitw survfite<-survreg(Surv(N_Days,Status==1)~.,dist = "exponential",data = cirrhosis_prediction) summary(survfite) survfitln<-survreg(Surv(N_Days,Status==1)~.,dist = "lognormal",data = cirrhosis_prediction) summary(survfitln)
survfitlg<-survreg(Surv(N_Days,Status==1)~.,dist = "loglogistic",data = cirrhosis_prediction) summary(survfitlg)