pivadoc / Minerva-Anestesiologica_HMGB1Dataset

Dataset and R code for Paper Analysis
0 stars 0 forks source link

Dataset #2

Open pivadoc opened 5 years ago

pivadoc commented 5 years ago

HMGB1_Dataset_Final.xlsx CoxRegression.xlsx RCode.txt HMGB1_LOS.xlsx

pivadoc commented 5 years ago
Read Data From Text File

library(survival) library(survivalROC) library (ROCR) library(pROC) library(mice, pos=17) library (lme4)

Read Data From Text File

data <- read.table("C:/Users/Simone/Dropbox/RFile/hmgb1/JournalCriticalCare/HMGB1_Dataset_Final.csv", header=TRUE, sep=";", na.strings=c("", "NA"), dec=".", fill=TRUE, quote=""", comment.char="", strip.white=TRUE) data <- read.table("C:/Users/Simone/Dropbox/RFile/hmgb1/JournalCriticalCare/HMGB1_Dataset_Final.csv", header=TRUE, sep=";", na.strings=c("", "NA"), dec=".", fill=TRUE, quote=""", comment.char="", strip.white=TRUE) ######################################Box Plot for the variables tmp <- melt(data[, c("VM", "CSF_HMGB1", "CSF_Lactate", "CSF_Glucose", "CSF_Protein", "CSF_WBC")], id.vars="VM") ggplot(tmp, aes(factor(VM), y = value, fill=factor(VM))) + geom_boxplot() + facet_wrap(~variable, scales="free_y")

Outlier identification and removal

outlierKD <- function(dt, var) { var_name <- eval(substitute(var),eval(dt)) na1 <- sum(is.na(var_name)) m1 <- mean(var_name, na.rm = T) par(mfrow=c(2, 2), oma=c(0,0,3,0)) boxplot(var_name, main="With outliers") hist(var_name, main="With outliers", xlab=NA, ylab=NA) outlier <- boxplot.stats(var_name)$out mo <- mean(outlier) var_name <- ifelse(var_name %in% outlier, NA, var_name) boxplot(var_name, main="Without outliers") hist(var_name, main="Without outliers", xlab=NA, ylab=NA) title("Outlier Check", outer=TRUE) na2 <- sum(is.na(var_name)) cat("Outliers identified:", na2 - na1, "n") cat("Propotion (%) of outliers:", round((na2 - na1) / sum(!is.na(var_name))100, 1), "n") cat("Mean of the outliers:", round(mo, 2), "n") m2 <- mean(var_name, na.rm = T) cat("Mean without removing outliers:", round(m1, 2), "n") cat("Mean if we remove outliers:", round(m2, 2), "n") response <- readline(prompt="Do you want to remove outliers and to replace with NA? [yes/no]: ") if(response == "y" | response == "yes"){ dt[as.character(substitute(var))] <- invisible(var_name) assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv) cat("Outliers successfully removed", "n") return(invisible(dt)) } else{ cat("Nothing changed", "n") return(invisible(var_name)) } } ##################################Replacing outlier with NA outlierKD(data,CSF_HMGB1) outlierKD(data,CSF_Lactate) outlierKD(data,CSF_WBC) outlierKD(data,CSF_Protein) ##################################Create DatasaetwithNA tempData <- mice(data,m=5,maxit=50,meth='pmm',seed=500) completedData <- complete(tempData,1) editDataset(completedData) #################################Box Plot After Imputation tmp <- melt(completedData[, c("VM", "CSF_HMGB1", "CSF_Lactate", "CSF_Glucose", "CSF_Protein", "CSF_WBC")], id.vars="VM") ggplot(tmp, aes(factor(VM), y = value, fill=factor(VM))) + geom_boxplot() + facet_wrap(~variable, scales="free_y") ############################resulting dataset stored in FinalDaset library (cvAUC) library(ROCR) library(pROC) library(survival) library(survivalROC) library (ROCR) library(pROC) library(mice, pos=17) library (lme4) require(ggplot2) require(GGally) require(reshape2) library(lme4) require(compiler) require(parallel) require(boot) require(lattice) require(glmer) data <- readXL("C:/Users/SIMONE/Dropbox/RFile/hmgb1/JournalCriticalCare/FinalDataset.xlsx", rownames=FALSE, header=TRUE, na="", sheet="DATAsetIMPUTATO", stringsAsFactors=TRUE) data$ICU_Mortality <- as.factor(data$ICU_Mortality) data$Patient <- as.factor(data$Patient) data$VM <- as.factor(data$VM) ############################ ############################COntrol for correlation between variables################################ ggpairs(data[, c("CSF_HMGB1", "CSF_Glucose", "CSF_Protein", "CSF_WBC", "CSF_Lactate")]) ##########################run mixed model mnull <-glmer(VM ~ 1 + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 1) m1<- glmer(VM ~ CSF_HMGB1 + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 1) anova(mnull,m1) m2<- glmer(VM ~ CSF_HMGB1 + CSF_Glucose + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 1) anova(m1,m2) m3<- glmer(VM ~ CSF_HMGB1 + CSF_WBC + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 1) anova(m2,m3) m4<- glmer(VM ~ CSF_HMGB1 + CSF_WBC + CSF_Glucose+ (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 9) anova(m4,m2) m5<- glmer(VM ~ CSF_HMGB1 + CSF_Lactate + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 1) anova(m2,m5) m6<- glmer(VM ~ CSF_HMGB1 + CSF_Glucose +CSF_Lactate + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 9) anova(m2,m6) m7<- glmer(VM ~ CSF_HMGB1 + CSF_Protein + (1 | Patient), data = completedData, family = binomial,control = glmerControl(optimizer = "bobyqa"),nAGQ = 1) anova(m2,m7) coef (m2) summary(m2) se <- sqrt(diag(vcov(m2))) (tab <- cbind(Est = fixef(m2), LL = fixef(m2) - 1.96 se, UL = fixef(m2) + 1.96 * se)) exp(tab) ################################################Performance of different parameters in detectin VM ###################################################### ################################################AUC was made with ripetute measures with raw data############################## predictionsHMGB1<-as.numeric (data$CSF_HMGB1) labels<-as.numeric(data$VM) predictionsGlucose<-as.numeric (data$CSF_Glucose) predictionsProtein<-as.numeric (data$CSF_Protein) predictionsWBC<-as.numeric (data$CSF_WBC) predictionsLactate<-as.numeric (data$CSF_Lactate) ci.pooled.cvAUC(predictionsHMGB1, labels, label.ordering = NULL,folds = NULL, data$Cognome, confidence = 0.95) ci.pooled.cvAUC(predictionsGlucose, labels, label.ordering = NULL,folds = NULL, data$Cognome, confidence = 0.95) ci.pooled.cvAUC(predictionsProtein, labels, label.ordering = NULL,folds = NULL, data$Cognome, confidence = 0.95) ci.pooled.cvAUC(predictionsWBC, labels, label.ordering = NULL,folds = NULL, data$Cognome, confidence = 0.95) ci.pooled.cvAUC(predictionsLactate, labels, label.ordering = NULL,folds = NULL, data$Cognome, confidence = 0.95) outHMGB1 <- cvAUC(predictionsHMGB1, labels) outLactate <- cvAUC(predictionsLactate, labels) outGlucose <- cvAUC(predictionsGlucose, labels) outProtein <- cvAUC(predictionsProtein, labels) outWBC <- cvAUC(predictionsWBC, labels) plot(outHMGB1$perf, col="firebrick4", lty=3, lwd=4) plot(outLactate$perf, col="peru", lty=1, lwd=2, add=TRUE) plot(outGlucose$perf, col="midnightblue", lwd=2,lty=1, add=TRUE) plot(outProtein$perf, col="gold4", lty=1,lwd=2, add=TRUE) plot(outWBC$perf, col="green4", lty=1,lwd=2, add=TRUE) legend(0.4,0.2,box.col="white",cex=0.8,border= "white", title="CSF Variables AUC", c("HMGB1=0.83 (95% C.I. = 0.72 - 0.94)", "Lactate =0.52 (95% C.I. =0.35 - 0.69)","Protein =0.49 (95% C.I. =0.35 - 0.63)","Glucose = 0.24 (95% C.I. =0.09-0.39)","WBC = 0.79 (95% C.I. = 0.69 - 0.91)"), col=c("firebrick4","peru","midnightblue","gold4","green4"), lty=3, lwd=4)

#############################################################################Cox Regression Cox <- readXL("C:/Users/Simone/Dropbox/RFile/hmgb1/JournalCriticalCare/CoxRegression.xlsx", rownames=FALSE, header=TRUE, na="", sheet="Foglio1", stringsAsFactors=TRUE) fit<- coxph (Surv(Time1,Time2,VM) ~ CSF_HMGB1 + CSF_Lactate + CSF_Glucose + CSF_WBC + CSF_Protein + Age + Sex + cluster (Patients), data = Cox) fit summary (fit)

check for violation of proportional hazard: if statistically significant there is violation

(residual <- cox.zph(fit)) plot(residual[1]) abline(0,0,col=1,lty=3,lwd=2) abline(h=fit$coef[1],col=3,lwd=2,lty=2) legend ("bottomright",legend=c('Reference line for null effect', "Average Hazard over Time", "Time-varying hazard"), lty=c(3,2,1), col=c(1,3,1),lwd=2) plot(residual[3]) abline(0,0,col=1,lty=3,lwd=2) abline(h=fit$coef[3],col=3,lwd=2,lty=2) legend ("bottomright",legend=c('Ref erence line for null effect', "Average Hazard over Time", "Time-varying hazard"), lty=c(3,2,1), col=c(1,3,1),lwd=2) ###################################CSF-HMGB1 compared to Plasma HMGB1###############################################à RCODE: Dataset <- read.table("C:/Users/pivadoc/Downloads/Datasets/HMGB1_Dataset_Final.csv", header = TRUE, sep = ";", na.strings = c("", "NA"), dec = ".", fill = TRUE, quote = "\"", comment.char = "", strip.white = TRUE) Dataset <- read.table("C:/Users/pivadoc/Downloads/Datasets/HMGB1_Dataset_Final.csv", header = TRUE, sep = ";", na.strings = c("", "NA"), dec = ".", fill = TRUE, quote = "\"", comment.char = "", strip.white = TRUE)

Convert factors or character variables of numeric data to numeric variables

Dataset$CSF_Glucose <- as.numeric(as.character(Dataset$CSF_Glucose)) Dataset$CSF_HMGB1 <- as.numeric(as.character(Dataset$CSF_HMGB1)) Dataset$CSF_Lactate <- as.numeric(as.character(Dataset$CSF_Lactate)) Dataset$CSF_Protein <- as.numeric(as.character(Dataset$CSF_Protein)) Dataset$CSF_WBC <- as.numeric(as.character(Dataset$CSF_WBC)) Dataset$H_LOS <- as.numeric(as.character(Dataset$H_LOS)) Dataset$ICU_LOS <- as.numeric(as.character(Dataset$ICU_LOS)) Dataset$Serum_CSF <- as.numeric(as.character(Dataset$Serum_CSF)) library(nlme, pos = 19)

Remove rows with missing data in specified variables

Dataset <- Dataset[complete.cases(Dataset$CSF_HMGB1, Dataset$Serum_CSF), ] model = lme(CSF_HMGB1 ~ Serum_CSF , random = ~1|Cognome, data=Dataset) summary (model)