Closed pivadoc closed 6 years ago
#######File Usato: C:\Users\Simone\Dropbox\RFile\hmgb1\JournalCriticalCare\HMGB1_Dataset_Final
library(survival)
library(survivalROC)
library (ROCR)
library(pROC)
library(mice, pos=17)
library (lme4)
data <- read.table("C:/Users/Simone/Dropbox/RFile/hmgb1/JournalCriticalCare/HMGB1_Dataset_Final.csv", header=TRUE, sep=";", na.strings=c("", "NA"), dec=".", fill=TRUE, quote="\"", comment.char="", strip.white=TRUE) data <- read.table("C:/Users/Simone/Dropbox/RFile/hmgb1/JournalCriticalCare/HMGB1_Dataset_Final.csv", header=TRUE, sep=";", na.strings=c("", "NA"), dec=".", fill=TRUE, quote="\"", comment.char="", strip.white=TRUE) ######################################Box Plot for the variables tmp <- melt(data[, c("VM", "CSF_HMGB1", "CSF_Lactate", "CSF_Glucose", "CSF_Protein", "CSF_WBC")], id.vars="VM") ggplot(tmp, aes(factor(VM), y = value, fill=factor(VM))) + geom_boxplot() + facet_wrap(~variable, scales="free_y")
outlierKD <- function(dt, var) { var_name <- eval(substitute(var),eval(dt)) na1 <- sum(is.na(var_name)) m1 <- mean(var_name, na.rm = T) par(mfrow=c(2, 2), oma=c(0,0,3,0)) boxplot(var_name, main="With outliers") hist(var_name, main="With outliers", xlab=NA, ylab=NA) outlier <- boxplot.stats(var_name)$out mo <- mean(outlier) var_name <- ifelse(var_name %in% outlier, NA, var_name) boxplot(var_name, main="Without outliers") hist(var_name, main="Without outliers", xlab=NA, ylab=NA) title("Outlier Check", outer=TRUE) na2 <- sum(is.na(var_name)) cat("Outliers identified:", na2 - na1, "n") cat("Propotion (%) of outliers:", round((na2 - na1) / sum(!is.na(var_name))100, 1), "n") cat("Mean of the outliers:", round(mo, 2), "n") m2 <- mean(var_name, na.rm = T) cat("Mean without removing outliers:", round(m1, 2), "n") cat("Mean if we remove outliers:", round(m2, 2), "n") response <- readline(prompt="Do you want to remove outliers and to replace with NA? [yes/no]: ") if(response == "y" | response == "yes"){ dt[as.character(substitute(var))] <- invisible(var_name) assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv) cat("Outliers successfully removed", "n") return(invisible(dt)) } else{ cat("Nothing changed", "n") return(invisible(var_name)) } } ##################################Replacing outlier with NA outlierKD(data,CSF_HMGB1) outlierKD(data,CSF_Lactate) outlierKD(data,CSF_WBC) outlierKD(data,CSF_Protein) ##################################Create DatasaetwithNA tempData <- mice(data,m=5,maxit=50,meth='pmm',seed=500) completedData <- complete(tempData,1) editDataset(completedData) #################################Box Plot After Imputation tmp <- melt(completedData[, c("VM", "CSF_HMGB1", "CSF_Lactate", "CSF_Glucose", "CSF_Protein", "CSF_WBC")], id.vars="VM") ggplot(tmp, aes(factor(VM), y = value, fill=factor(VM))) + geom_boxplot() + facet_wrap(~variable, scales="free_y") #################################Running Mixed Effect Model m1 <- glmer(VM ~ CSF_HMGB1 + CSF_Glucose + CSF_Protein + CSF_WBC + CSF_Lactate + (1 | Cognome), data = completedData, family = binomial) m2 <- glmer(VM ~ CSF_HMGB1 + CSF_Glucose + CSF_Protein + CSF_Lactate+ (1 | Cognome), data = completedData, family = binomial) m3 <- glmer(VM ~ CSF_HMGB1 + CSF_Glucose + CSF_Lactate+ (1 | Cognome), data = completedData, family = binomial) m4 <- glmer(VM ~ CSF_HMGB1 + CSF_Lactate+ (1 | Cognome), data = completedData, family = binomial) m5 <- glmer(VM ~ CSF_HMGB1 +(1 | Cognome), data = completedData, family = binomial) anova (m1, m2) anova (m1, m3) anova (m3, m4) anova (m3, m5) anova (m2,m3) coef (m2) summary(m2) se <- sqrt(diag(vcov(m2))) (tab <- cbind(Est = fixef(m2), LL = fixef(m2) - 1.96 se, UL = fixef(m2) + 1.96 * se)) exp(tab) ###################COX REGRESSION ###################https://www-ncbi-nlm-nih-gov.proxy.unibs.it/pmc/articles/PMC6015946/
Cox <- readXL("C:/Users/Simone/Dropbox/RFile/hmgb1/JournalCriticalCare/CoxRegression.xlsx", rownames=FALSE, header=TRUE, na="", sheet="Foglio1", stringsAsFactors=TRUE) fit<- coxph (Surv(Time1,Time2,VM) ~ CSF_HMGB1 + CSF_Lactate + CSF_Glucose + CSF_WBC + CSF_Protein + Age + Sex + cluster (Patients), data = Cox) fit summary (fit)
(residual <- cox.zph(fit)) plot(residual[1]) abline(0,0,col=1,lty=3,lwd=2) abline(h=fit$coef[1],col=3,lwd=2,lty=2) legend ("bottomright",legend=c('Reference line for null effect', "Average Hazard over Time", "Time-varying hazard"), lty=c(3,2,1), col=c(1,3,1),lwd=2) plot(residual[3]) abline(0,0,col=1,lty=3,lwd=2) abline(h=fit$coef[3],col=3,lwd=2,lty=2) legend ("bottomright",legend=c('Reference line for null effect', "Average Hazard over Time", "Time-varying hazard"), lty=c(3,2,1), col=c(1,3,1),lwd=2) ####################ROC Curve for each variables (repeat the command for each variables of interest rocHMGB1<-roc(Cox$VM, Cox$CSF_HMGB1, plot=TRUE,percent=roc1$percent,col="red") coords(rocHMGB1, "best", ret=c("threshold", "specificity", "1-npv")) sens.ciHMGB1 <- ci.se(rocHMGB1, specificities=seq(0, 100, 5)) plot(sens.ciHMGB1, type="shape", col="lightblue") legend ("bottomright",legend=c('AUC=0.8205 (95% C.I.=0.66-0.84)', "Treshold=9.04"), bg='lightblue') legend("topleft", legend=c('CSF_HMGB1')) auc (Cox$VM, Cox$CSF_HMGB1,boot.n=100) ci.auc (Cox$VM, Cox$CSF_HMGB1, boot.n=100, conf.level=0.95, stratified=FALSE, partial.auc=c(1, .8),partial.auc.focus="se", partial.auc.correct=TRUE) rocLactate<- roc(Cox$VM, Cox$CSF_Lactate, plot=TRUE,percent=roc1$percent,col="lightblue") coords(rocLactate, "best", ret=c("threshold", "specificity", "1-npv")) sens.ciLactate <- ci.se(rocLactate, specificities=seq(0, 100, 5)) plot(sens.ciLactate, type="shape", col="indianred1") legend ("bottomright",legend=c('AUC=0.52 (95% C.I.=0.53-0.68)', "Treshold=2.35"), bg='indianred1') legend("topleft", legend=c('CSF_Lactate')) auc (Cox$VM, Cox$CSF_Lactate,boot.n=100) ci.auc (Cox$VM, Cox$CSF_Lactate, boot.n=100, conf.level=0.95, stratified=FALSE, partial.auc=c(1, .8),partial.auc.focus="se", partial.auc.correct=TRUE) rocWBC<- roc(Cox$VM, Cox$CSF_WBC, plot=TRUE,percent=roc1$percent,col="darkred") coords(rocWBC, "best", ret=c("threshold", "specificity", "1-npv")) sens.ciWBC <- ci.se(rocWBC, specificities=seq(0, 100, 5)) plot(sens.ciWBC, type="shape", col="darkred") legend ("bottomright",legend=c('AUC=0.8 (95% C.I.=0.57-0.85)', "Treshold=161"), bg='darkred') legend("topleft", legend=c('CSF_WBC')) auc (Cox$VM, Cox$CSF_WBC,boot.n=100) ci.auc (Cox$VM, Cox$CSF_WBC, boot.n=100, conf.level=0.95, stratified=FALSE, partial.auc=c(1, .8),partial.auc.focus="se", partial.auc.correct=TRUE) rocProtein<- roc(Cox$VM, Cox$CSF_Protein, plot=TRUE,percent=roc1$percent,col='yellowgreen') coords(rocProtein, "best", ret=c("threshold", "specificity", "1-npv")) sens.ciProtein <- ci.se(rocProtein, specificities=seq(0, 100, 5)) plot(sens.ciProtein, type="shape", col="yellowgreen") legend ("bottomright",legend=c('AUC=0.52 (95% C.I.=0.47-0.60)', "Treshold=91"), bg='yellowgreen') legend("topleft", legend=c('CSF_Protein')) auc (Cox$VM, Cox$CSF_Protein,boot.n=100) ci.auc (Cox$VM, Cox$CSF_Protein, boot.n=100, conf.level=0.95, stratified=FALSE, partial.auc=c(1, .8),partial.auc.focus="se", partial.auc.correct=TRUE) rocGlucose<- roc(Cox$VM, Cox$CSF_Glucose, plot=TRUE,percent=roc1$percent,col="slategray2") coords(rocGlucose, "best", ret=c("threshold", "specificity", "1-npv")) sens.ciGlucose <- ci.se(rocGlucose, specificities=seq(0, 100, 5)) plot(sens.ciGlucose, type="shape", col="lightskyblue3") legend ("bottomright",legend=c('AUC=0.76 (95% C.I.=0.51-0.83)', "Treshold=54"), bg='slategray2') legend("topleft", legend=c('CSF_Glucose')) auc (Cox$VM, Cox$CSF_Glucose,boot.n=100) ci.auc (Cox$VM, Cox$CSF_Glucose, boot.n=100, conf.level=0.95, stratified=FALSE, partial.auc=c(1, .8),partial.auc.focus="se", partial.auc.correct=TRUE)
roc.test(rocGlucose, rocWBC, reuse.auc=FALSE)
HMGB1_Dataset_Final.zip