Open Marshhhhh opened 8 years ago
setwd("C:/Users/qwerty/Desktop/Скоринг/1") Data <- read.csv2("German Data.csv", header = F) str(Data)
Cor1 <- cor(Data, method = "spearman") Cor2 <- cor(Data, method = "kendall")
for(i in 1:ncol(Cor1)){ for (j in 1:nrow(Cor1)){ Cor1[i,j] <- ifelse(abs(as.numeric(Cor1[i,j]))>0.5,Cor1[i,j],"-") } }
for(i in 1:ncol(Cor2)){ for (j in 1:nrow(Cor2)){ Cor2[i,j] <- ifelse(abs(as.numeric(Cor2[i,j]))>0.5,Cor1[i,j],"-") } }
write.table(file="C:/Users/qwerty/Desktop/Скоринг/1/Cor1_Sp.csv", Cor1) write.table(file="C:/Users/qwerty/Desktop/Скоринг/1/Cor2_Ken.csv", Cor2)
cc <- c(1,3,4,5,7,9,10,11,12,13,14,15,17,19,20) for (i in cc){ Data[,i] <- as.factor(Data[,i]) }
Data1 <- Data[,-c(2,16,17,18)]
Data1$V21 <- Data1$V21-1 mylogit <- glm(V21~., data = Data1, family = "binomial") mylogit_tab_1 <- summary(mylogit) mylogit_tab_2 <- confint(mylogit)
write.table(file="C:/Users/qwerty/Desktop/Скоринг/1/tab_model_1.csv", mylogit_tab_1) write.table(file="C:/Users/qwerty/Desktop/Скоринг/1/tab_model_2.csv", mylogit_tab_2)
result<-predict(mylogit,type="response") result_all<-round(result,digits=4) write.table(file="C:\Users\qwerty\Desktop\S\a\probability.csv", result_all)
mylogit_f <- step(mylogit)
mylogit_tab_1_f <- summary(mylogit_f) mylogit_tab_2_f <- confint(mylogit_f) write.table(file="C:/Users/qwerty/Desktop/Скоринг/1/tab_model_1_f.csv", mylogit_tab_1_f) write.table(file="C:/Users/qwerty/Desktop/Скоринг/1/tab_model_2_f.csv", mylogit_tab_2_f)
result_f<-predict(mylogit_f,type="response") result_all_f<-round(result_f,digits=4) write.table(file="C:\Users\qwerty\Desktop\S\a\probability_f.csv", result_all_f)
library(pROC) roc_model <- roc(Data$V21, predict(mylogit, type="response"), ci=TRUE) roc_model_f <- roc(data_base$F, predict(mylogit_f, type="response"), ci=TRUE)
plot_roc <- plot.roc(roc_model) summary(plot_roc) write.image(file=""C:\Users\qwerty\Desktop\S\R\plot_roc.jpg", plot_roc)
plot_roc_f <- plot.roc(roc_model_f, add=TRUE, col="blue")
write.image(file=""C:\Users\qwerty\Desktop\S\R\plot_roc_f.jpg", plot_roc_f)
roc.test(roc_model, roc_model_f, method="delong") roc.test(roc_model, roc_model_f, method="bootstrap")
#########
setwd("C:/Users/shirobokova_ma/Desktop/аспирин/конференции/ИЭиУ 2016/German") Data0 <- read.csv2("German Data.csv", header = T) Data <- read.csv2("Data.csv", header = T) str(Data)
Data$D <- Data$D-1
Cor1_0 <- cor(Data0, method = "spearman") Cor2_0 <- cor(Data0, method = "kendall")
Cor1 <- cor(Data, method = "spearman") Cor2 <- cor(Data, method = "kendall")
for(i in 1:ncol(Cor1)){ for (j in 1:nrow(Cor1)){ Cor1[i,j] <- ifelse(abs(as.numeric(Cor1[i,j]))>0.3,Cor1[i,j],"-") } }
for(i in 1:ncol(Cor2)){ for (j in 1:nrow(Cor2)){ Cor2[i,j] <- ifelse(abs(as.numeric(Cor2[i,j]))>0.3,Cor2[i,j],"-") } }
write.table(file="C:/Users/shirobokova_ma/Desktop/аспирин/конференции/ИЭиУ 2016/German/Cor1_Sp.csv", Cor1, sep=";") write.table(file="C:/Users/shirobokova_ma/Desktop/аспирин/конференции/ИЭиУ 2016/German/Cor2_Ken.csv", Cor2, sep=";")
cc <- c(1,3,4,5,7,9,10,11,12,13,14,15,17,19,20) for (i in cc){ Data[,i] <- as.factor(Data[,i]) }
Data1 <- Data[,-c(2,8,16,17,18,20)]
S <- sample(nrow(Data1),size=700,replace=F) Data1$Sample <- ifelse(row.names(Data1)%in%S,1,0) D1 <- subset(Data1,Data1$Sample==1)[,-ncol(Data1)] #train sample D2 <- subset(Data1,Data1$Sample==0)[,-ncol(Data1)] #test sample
mylogit <- glm(D~., data = D1, family=binomial(link='logit')) mylogit_tab <- summary(mylogit)
ss <- coef(summary(mylogit)) ss_sig <- subset(ss,ss[,"Pr(>|z|)"]<0.1)
write.table(file="C:/Users/shirobokova_ma/Desktop/аспирин/конференции/ИЭиУ 2016/German/tab_model_2.csv", ss_sig, sep=";")
library(pROC) roc_model <- roc(D1$D, predict(mylogit, type="response"), ci=TRUE) plot_roc <- plot.roc(roc_model)
roc_model_test <- roc(D2$D, predict(mylogit, newdata=D2, type="response"), ci=TRUE)
plot_roc_test <- plot.roc(roc_model_test, add=TRUE, col="blue")
par(mfrow = c(1, 1)) par(font=6, font.lab=6, font.main=6,cex.lab=1,cex.axis=1) plot(plot_roc,ylim=c(0,1),xlab="Специфичность", ylab="Чувствительность", main=list("ROC-кривая", cex = 1,font = 1), type=1) abline(h = seq(0,1,0.05), v = seq(0,1,0.05), col = "lightgray", lty=3) abline(a=1, b=-1, col = "gray60") lines(plot_roc, type="l",lwd="2",col="darkblue") lines(plot_roc_test, type="l",lwd="2",col="red") leg <- c(paste0("AUC_train = ",round(plot_roc$auc,digits=2), "\n","Gini_train = ",(round(plot_roc$auc,digits=2)-0.5)_2,"\n"), paste0("AUC_test = ",round(plot_roc_test$auc,digits=2), "\n","Gini_test = ",(round(plot_roc_test$auc,digits=2)-0.5)_2))
legend(0.5,0.5,cex = 0.8,bty="n",lwd=2,lty=c(1,1),col=c("darkblue","red"), legend=leg)
#########
library("ROCR")
active <- read.table("sample.active", sep=",", header=FALSE)
inactive <- read.table("sample.inactive", sep=",", header=FALSE)
target_pred <- as.matrix(rbind(active,inactive))
ncol <- ncol(inactive)
class.active <- matrix(sample(1, (ncol(active)_nrow(active)), replace=T), ncol=ncol) class.inactive <- matrix(sample(0, (ncol(inactive)_nrow(inactive)), replace=T), ncol=ncol)
target_class <- rbind(class.active,class.inactive)
pred <- prediction(target_pred, target_class) perf <- performance(pred,"tpr","fpr")
par(mar=c(5,5,2,2),xaxs = "i",yaxs = "i",cex.axis=1.3,cex.lab=1.4)
plot(perf,col="black",lty=3, lwd=3)
auc <- performance(pred,"auc")
auc <- unlist(slot(auc, "y.values"))
minauc<-min(round(auc, digits = 2)) maxauc<-max(round(auc, digits = 2)) minauct <- paste(c("min(AUC) = "),minauc,sep="") maxauct <- paste(c("max(AUC) = "),maxauc,sep="") legend(0.3,0.6,c(minauct,maxauct,"\n"),border="white",cex=1.7,box.col = "white") #
sourse("reader.R")
reader=function(path=path,h=T) { split_path=strsplit(path,"/") file=split_path[[1]][length(split_path[[1]])] dir_path=paste(split_path[[1]][1:(length(split_path[[1]])-1)],collapse="/") setwd(dir_path) f1=read.table;f2=read.csv;f3=read.csv2;f4=read.delim;f5=read.delim2; for(f in c(f1,f2,f3,f4,f5)) { if(inherits(try(f(file,h=h,nrow=1),silent=T),"try-error")==T) { next } else { file_1_row=f(file,h=h,nrow=1,dec=".") split_file=strsplit(paste(names(file_1_row),collapse=""),"")[[1]] split_file_letters=split_file[-grep("\.|0|1|2|3|4|5|6|7|8|9|\,|;|:|\)|\)",split_file)] letters_in_alphabet=names(table(tolower(split_file_letters))) %in% strsplit("йцукенгшщзхъэждлорпавыфячсмитьбюqwertyuioplkjhgfdsazxcvbnm","")[[1]] if(dim(file_1_row)[2]>1) { file_1_row=f(file,h=T,nrow=1,dec=".") num_row_dot=NULL num_row_comma=NULL for(i in 1:length(file_1_row)){ if(is.numeric(file_1_row[,i])==T){ num_row_dot=append(num_row_dot,i)}} file_1_row2=f(file,h=T,nrow=1,dec=",") for(i in 1:length(file_1_row2)){ if(is.numeric(file_1_row2[,i])==T){ num_row_comma=append(num_row_comma,i)}} decimal=ifelse(length(num_row_dot)>length(num_row_comma),".",",")
if(length(letters_in_alphabet[letters_in_alphabet==T])/length(letters_in_alphabet)>0.9) { return(f(file,h=h,dec=decimal)) } else { return(f(file,h=h,encoding="UTF-8",dec=decimal)) } } } } }
setwd("C:\Users\qwerty\Desktop") tab <- read.table("data_R.txt", sep=";",dec=",", head=TRUE) tab$A1 <- factor(tab$A1)
cor(tab, method = "spearman") cor(tab, method = "kendall")
mylogit <- glm(F~A1+A2+A3+A4+A5+A6+A7+A8+A9+A10+A11+A12+A13+A14+A15+A16+A17+A18+A19+A20+A21+A22, data = tab, family = "binomial") summary(mylogit) confint(mylogit)
install.packages("pROC") library(pROC)
setwd("C:\Users\qwerty\Desktop\S") data_base <- read.table("data.txt", sep=";",dec=",", head=TRUE)
for(i in 1:5) { assign(paste("A1",".",i,sep=""), rep(0, length(data_base$A1)))} A1.1[data_base$A1==1] <- 1 A1.2[data_base$A1==2] <- 2 A1.3[data_base$A1==3] <- 3 A1.4[data_base$A1==4] <- 4
for(i in 1:14) { assign(paste("A2",".",i,sep=""), rep(0, length(data_base$A2)))} A2.1[data_base$A2==1] <- 1 A2.2[data_base$A2==2] <- 2 A2.3[data_base$A2==3] <- 3 A2.4[data_base$A2==4] <- 4 A2.5[data_base$A2==5] <- 5 A2.6[data_base$A2==6] <- 6 A2.7[data_base$A2==7] <- 7 A2.8[data_base$A2==8] <- 8 A2.9[data_base$A2==9] <- 9 A2.10[data_base$A2==10] <- 10 A2.11[data_base$A2==11] <- 11 A2.12[data_base$A2==12] <- 12 A2.13[data_base$A2==13] <- 13 A2.14[data_base$A2==14] <- 14
for(i in 1:18) { assign(paste("A3",".",i,sep=""), rep(0, length(data_base$A3)))} A3.1[data_base$A3==1] <- 1 A3.2[data_base$A3==2] <- 2 A3.3[data_base$A3==3] <- 3 A3.4[data_base$A3==4] <- 4 A3.5[data_base$A3==5] <- 5 A3.6[data_base$A3==6] <- 6 A3.7[data_base$A3==7] <- 7 A3.8[data_base$A3==8] <- 8 A3.9[data_base$A3==9] <- 9 A3.10[data_base$A3==10] <- 10 A3.11[data_base$A3==11] <- 11 A3.12[data_base$A3==12] <- 12 A3.13[data_base$A3==13] <- 13 A3.14[data_base$A3==14] <- 14 A3.15[data_base$A3==15] <- 15 A3.16[data_base$A3==16] <- 16 A3.17[data_base$A3==17] <- 17 A3.18[data_base$A3==18] <- 18
for(i in 1:23) { assign(paste("A6",".",i,sep=""), rep(0, length(data_base$A6)))} A6.1[data_base$A6==1] <- 1 A6.2[data_base$6==2] <- 2 A6.3[data_base$A6==3] <- 3 A6.4[data_base$A6==4] <- 4 A6.5[data_base$A6==5] <- 5 A6.6[data_base$A6==6] <- 6 A6.7[data_base$A6==7] <- 7 A6.8[data_base$A6==8] <- 8 A6.9[data_base$A6==9] <- 9 A6.10[data_base$A6==10] <- 10 A6.11[data_base$A6==11] <- 11 A6.12[data_base$A6==12] <- 12 A6.13[data_base$A6==13] <- 13 A6.14[data_base$A6==14] <- 14 A6.15[data_base$A6==15] <- 15 A6.16[data_base$A6==16] <- 16 A6.17[data_base$A6==17] <- 17 A6.18[data_base$A6==18] <- 18 A6.19[data_base$A6==19] <- 19 A6.20[data_base$A6==20] <- 20 A6.21[data_base$A6==21] <- 21 A6.22[data_base$A6==22] <- 22 A6.23[data_base$A6==23] <- 23
for(i in 1:3) { assign(paste("A7",".",i,sep=""), rep(0, length(data_base$A7)))} A7.1[data_base$A7==1] <- 1 A7.2[data_base$A7==2] <- 2 A7.3[data_base$A7==3] <- 3
for(i in 1:4) { assign(paste("A10",".",i,sep=""), rep(0, length(data_base$A10)))} A10.1[data_base$A10==1] <- 1 A10.2[data_base$A10==2] <- 2
for(i in 1:3) { assign(paste("A11",".",i,sep=""), rep(0, length(data_base$A11)))} A11.1[data_base$A11==1] <- 1 A11.2[data_base$A11==2] <- 2 A11.3[data_base$A11==3] <- 3
for(i in 1:8) { assign(paste("A13",".",i,sep=""), rep(0, length(data_base$A13)))} A13.1[data_base$A13==1] <- 1 A13.2[data_base$A13==2] <- 2 A13.3[data_base$A13==3] <- 3 A13.4[data_base$A13==4] <- 4 A13.5[data_base$A13==5] <- 5 A13.6[data_base$A13==6] <- 6 A13.7[data_base$A13==7] <- 7 A13.8[data_base$A13==8] <- 8
for(i in 1:3) { assign(paste("A14",".",i,sep=""), rep(0, length(data_base$A14)))} A14.1[data_base$A14==1] <- 1 A14.2[data_base$A14==2] <- 2 A14.3[data_base$A14==3] <- 3
for(i in 1:2) { assign(paste("A15",".",i,sep=""), rep(0, length(data_base$A15)))} A15.1[data_base$A15==1] <- 1 A15.2[data_base$A15==2] <- 2
for(i in 1:2) { assign(paste("A16",".",i,sep=""), rep(0, length(data_base$A16)))} A16.1[data_base$A16==1] <- 1 A16.2[data_base$A16==2] <- 2
for(i in 1:2) { assign(paste("A17",".",i,sep=""), rep(0, length(data_base$A17)))} A17.1[data_base$A17==1] <- 1 A17.2[data_base$A17==2] <- 2
for(i in 1:2) { assign(paste("A20",".",i,sep=""), rep(0, length(data_base$A20)))} A20.1[data_base$A20==1] <- 1 A20.2[data_base$A20==2] <- 2
for(i in 1:2) { assign(paste("A21",".",i,sep=""), rep(0, length(data_base$A21)))} A21.1[data_base$A21==1] <- 1 A21.2[data_base$A21==2] <- 2
corel<-cor(data_base, method = "spearman") write.table(file="C:\Users\qwerty\Desktop\S\correlation.csv", corel)
mylogit <- glm(F~ A1.1+A1.2+A1.3+A1.4 +A2.1+A2.2+A2.3+A2.4+A2.5+A2.6+A2.7+A2.8+A2.9+A2.10+A2.11+A2.12+A2.13+A2.14 +A3.1+A3.2+A3.3+A3.4+A3.5+A3.6+A3.7+A3.8+A3.9+A3.10+A3.11+A3.12+A3.13+A3.14+A3.15+A3.16+A3.17+A3.18 +A6.1+A6.2+A6.3+A6.4+A6.5+A6.6+A6.7+A6.8+A6.9+A6.10+A6.11+A6.12+A6.13+A6.14+A6.15+A6.16+A6.17+A6.18+A6.19+A6.20+A6.21+A6.22+A6.23 +A7.1+A7.2+A7.3 +A8 +A10.1+A10.2 +A11.1+A11.2+A11.3 +A13.1+A13.2+A13.3+A13.4+A13.5+A13.6+A13.7+A13.8 +A14.1+A14.2+A14.3 +A15.1+A15.2 +A16.1+A16.2 +A17.1+A17.2 +A20.1+A20.2 +A21.1+A21.2, data = data_base, family = "binomial")
mylogit_tab_1 <- summary(mylogit) mylogit_tab_2 <- confint(mylogit) write.table(file="C:\Users\qwerty\Desktop\S\a\tab_model_1.csv", mylogit_tab_1) write.table(file="C:\Users\qwerty\Desktop\S\a\tab_model_2.csv", mylogit_tab_2)
result<-predict(mylogit,type="response") result_all<-round(result,digits=4) write.table(file="C:\Users\qwerty\Desktop\S\a\probability.csv", result_all)
mylogit_f <- step(mylogit)
mylogit_tab_1_f <- summary(mylogit_f) mylogit_tab_2_f <- confint(mylogit_f) write.table(file="C:\Users\qwerty\Desktop\S\a\tab_model_1_f.csv", mylogit_tab_1_f) write.table(file="C:\Users\qwerty\Desktop\S\a\tab_model_2_f.csv", mylogit_tab_2_f)
result_f<-predict(mylogit_f,type="response") result_all_f<-round(result_f,digits=4) write.table(file="C:\Users\qwerty\Desktop\S\a\probability_f.csv", result_all_f)
roc_model <- roc(data_base$F, predict(mylogit, type="response"), ci=TRUE) roc_model_f <- roc(data_base$F, predict(mylogit_f, type="response"), ci=TRUE)
plot_roc <- plot.roc(roc_model) write.image(file=""C:\Users\qwerty\Desktop\S\R\plot_roc.jpg", plot_roc)
plot_roc_f <- plot.roc(roc_model_f, add=TRUE, col="blue") write.image(file=""C:\Users\qwerty\Desktop\S\R\plot_roc_f.jpg", plot_roc_f)
roc.test(roc_model, roc_model_f, method="delong") roc.test(roc_model, roc_model_f, method="bootstrap")