Marshhhhh commented 7 years ago

Yesterday I wanted to think of an unreal situation. What would be different in my life, when I was of the opposite sex. Hmm ... I would be a blonde strong men. Because I would be have sport training. Also I necessarily would have a driver's license, because I don't have it now. And I would want to know how gun works, why plane flies and rocket takes off. I think that mens have more constructive intellection. What about my profession I would be a few change my speciality and learn programming. And additional point, I should to serve in the military. I would do it. What else, I think I wouldn't have a new interests in my life. I also would learn mathematical and economical sciences as now. And I also would want to visit different countries and find out distinction of different cultures as now. Becouse I would be have the same genes, parents, life as now.

Marshhhhh commented 7 years ago

1 Logit bd1 cat

d_t <- train.cat1[,c] mylogit1 <- step(glm(bd1 ~ ., d_t[,-c(1,14)], family=binomial(link="logit")), direction="backward",trace=F)

testp1 <- round(predict(mylogit1,type="response",newdata = test.cat1),6) pred1 <- prediction(testp1,test.cat1$bd1) perf1 <- performance(pred1,"tpr","fpr")

2 RandomForest bd1

set.seed(124) d_t <- train[,c] d_t <- d_t[,-c(13)] d_t$bd1 <- as.factor(d_t$bd1)

RF <- function(d_t, size = 10000, ntree = 300){

d_t$bd1 <- as.factor(d_t$bd1)

s1 <- sample(d_t$id_credit,replace = F,size = size) ds1 <- d_t[d_t$id_credit%in%s1,]

proc <- summary(ds1$bd1)[1]/nrow(ds1)

arf1 <- randomForest(bd1 ~ ., data=ds1,

replace = F,

                   ntree=300,
                   importance=T,
                   proximity=T,
                   mtry=3,
                   type = "classification")

pred <- predict(arf1,test,type='prob')[,2] return(pred) }

A1 <- RF(d_t)

MA <- as.data.frame(test[,"id_credit"]) MA$A9 <- A1

testp5 <- apply(MA[,-1],1,mean) pred5 <- prediction(testp5,test$bd1) perf5 <- performance(pred5,"tpr","fpr")

2 RandomForest bd3

set.seed(124) d_t <- train[,c] d_t <- d_t[,-c(12)] d_t$bd3 <- as.factor(d_t$bd3)

names(d_t)

RF <- function(d_t, size = 10000, ntree = 300){

d_t$bd3 <- as.factor(d_t$bd3)

s1 <- sample(d_t$id_credit,replace = F,size = size)
ds1 <- d_t[d_t$id_credit%in%s1,]

proc <- summary(ds1$bd3)[1]/nrow(ds1)

arf1 <- randomForest(bd3 ~ .,
                     data=ds1,
                     # replace = F,
                     ntree=300,
                     importance=T,
                     proximity=T,
                     mtry=3,
                     type = "classification")

pred <- predict(arf1,test,type='prob')[,2]
return(pred)

}

A1 <- RF(d_t)

MAA <- as.data.frame(test[,"id_credit"]) MAA$A10 <- A1

testp6 <- apply(MAA[,-1],1,mean) pred6 <- prediction(testp6,test$bd3) perf6 <- performance(pred6,"tpr","fpr")

CTree bd1

conditional inference trees corrects for known biases in chaid and cart

library(party) set.seed(124) d_t <- train[,c] d_t <- d_t[,-c(13)]

d_t$bd3 <- as.factor(d_t$bd3)

cfit1 <- ctree(bd1 ~ .,data=d_t) plot(cfit1)

resultdfr <- as.data.frame(do.call("rbind", treeresponse(cfit1, newdata = test)))

testp9 <- resultdfr[,1] pred9 <- prediction(testp9,test$bd1) perf9 <- performance(pred9,"tpr","fpr")

CTree bd3

conditional inference trees corrects for known biases in chaid and cart

library(party) set.seed(124) d_t <- train[,c] d_t <- d_t[,-c(12)]

d_t$bd3 <- as.factor(d_t$bd3)

cfit2 <- ctree(bd3 ~ .,data=d_t) plot(cfit1)

resultdfr <- as.data.frame(do.call("rbind", treeresponse(cfit2, newdata = test)))

testp8 <- resultdfr[,1] pred8 <- prediction(testp8,test$bd3) perf8 <- performance(pred8,"tpr","fpr")

drop(Predict(cfit1,test))

11 Logit bd3 cat

d_t <- train.cat3[,c] d_t$bd3 <- ifelse(d_t$bd3==1,0,1) mylogit11 <- step(glm(bd3 ~ ., d_t[,-c(1,13)], family=binomial(link="logit")), direction="backward",trace=F)

testp11 <- round(predict(mylogit11,type="response",newdata = test.cat3),6) pred11 <- prediction(testp11,test.cat3$bd3) perf11 <- performance(pred11,"tpr","fpr")

SVM

library("e1071") set.seed(124) d_t <- train[,c] d_t <- d_t[,-c(13)] d_t$bd1 <- as.factor(d_t$bd1)

sv <- svm(bd1 ~ ., d_t[,-1], kernel="linear", cost = 10, scale=F)

Mean all

testp0 <- apply(cbind(testp11,testp6,testp8),1,mean) testp0 <- apply(cbind(testp1,testp5,testp9),1,mean) pred0 <- prediction(testp0,train.cat3$bd1) perf0 <- performance(pred0,"tpr","fpr")

plotting logistic results vs. random forest ROC

library("ROCR") par(font=6, font.lab=6, font.main=6,cex.lab=1,cex.axis=1) plot(perf1,col="darkorange",lty=1, lwd=3,type="l", xlab="1-Специфичность", ylab="Чувствительность", main=list("ROC-кривая", cex = 1,font = 1)) abline(h = seq(0,1,0.05), v = seq(0,1,0.05), col = "lightgray", lty=3) abline(a=0, b=1, col = "gray60")

plot(perf1,col="darkorange",lty=1, lwd=2,type="l",add=TRUE)

plot(perf2,col="tomato1",lty=1, lwd=2,type="l",add=TRUE)

plot(perf3,col="dodgerblue",lty=1, lwd=2,type="l",add=TRUE)

plot(perf4,col="turquoise2",lty=1, lwd=2,type="l",add=TRUE)

plot(perf11,col="dodgerblue",lty=1, lwd=2,type="l",add=TRUE) plot(perf5,col="yellow",lty=1, lwd=2,type="l",add=TRUE) plot(perf6,col="green",lty=1, lwd=2,type="l",add=TRUE) plot(perf9,col="gray",lty=1, lwd=2,type="l",add=TRUE) plot(perf8,col="pink",lty=1, lwd=2,type="l",add=TRUE) plot(perf0,col="black",lty=1, lwd=2,type="l",add=TRUE)

auc1 <- performance(pred1,"auc") auc1 <- unlist(slot(auc1, "y.values"))

auc2 <- performance(pred2,"auc")

auc2 <- unlist(slot(auc2, "y.values"))

auc3 <- performance(pred3,"auc")

auc3 <- unlist(slot(auc3, "y.values"))

auc4 <- performance(pred4,"auc") auc4 <- unlist(slot(auc4, "y.values"))

auc11 <- performance(pred11,"auc") auc11 <- unlist(slot(auc11, "y.values"))

auc5 <- performance(pred5,"auc") auc5 <- unlist(slot(auc5, "y.values"))

auc6 <- performance(pred6,"auc") auc6 <- unlist(slot(auc6, "y.values"))

auc9 <- performance(pred9,"auc") auc9 <- unlist(slot(auc9, "y.values"))

auc8 <- performance(pred8,"auc") auc8 <- unlist(slot(auc8, "y.values"))

auc0 <- performance(pred0,"auc") auc0 <- unlist(slot(auc0, "y.values"))

leg <- c(paste0("Gini_L1 = ",round((auc1-0.5)*2,digits=5)),

paste0("Gini_L2 = ",round((auc2-0.5)*2,digits=4),"\n"),

       # paste0("Gini_RF1 = ",round((auc3-0.5)*2,digits=4),"\n"),
       # paste0("Gini_RF2 = ",round((auc4-0.5)*2,digits=5),"\n"),
       paste0("Gini_L11 = ",round((auc11-0.5)*2,digits=5)),
       paste0("Gini_RF10 = ",round((auc5-0.5)*2,digits=5)),
       paste0("Gini_RF30 = ",round((auc6-0.5)*2,digits=5)),
       paste0("Gini_Ct = ",round((auc9-0.5)*2,digits=5)),
       paste0("Gini_Ct2 = ",round((auc8-0.5)*2,digits=5)),
       paste0("Gini_All = ",round((auc0-0.5)*2,digits=5)))

legend(0.4,0.5,cex = 0.8,bty="n",lwd=2,lty=c(1,1), col=c("darkorange","dodgerblue","yellow","green","gray","pink","black"), legend=leg)

connfusion matrix

library(e1071) library(caret)

confusionMatrix(data=train.cat3$bd3, reference=ifelse(testp11>0.3,1,0), positive = "1")

Marshhhhh commented 7 years ago

http://futurebanking.ru/reglamentbank/article/1882

Marshhhhh commented 7 years ago

https://yihui.name/knitr/options/

Marshhhhh commented 7 years ago

http://course-bd-and-pa-bsuir.github.io/r-statistics/

Marshhhhh / first

й #15

1 Logit bd1 cat

2 RandomForest bd1

replace = F,

2 RandomForest bd3

CTree bd1

conditional inference trees corrects for known biases in chaid and cart

d_t$bd3 <- as.factor(d_t$bd3)

CTree bd3

conditional inference trees corrects for known biases in chaid and cart

d_t$bd3 <- as.factor(d_t$bd3)

11 Logit bd3 cat

SVM

Mean all

plotting logistic results vs. random forest ROC

plot(perf2,col="tomato1",lty=1, lwd=2,type="l",add=TRUE)

plot(perf3,col="dodgerblue",lty=1, lwd=2,type="l",add=TRUE)

plot(perf4,col="turquoise2",lty=1, lwd=2,type="l",add=TRUE)

auc2 <- performance(pred2,"auc")

auc2 <- unlist(slot(auc2, "y.values"))

auc3 <- performance(pred3,"auc")

auc3 <- unlist(slot(auc3, "y.values"))

paste0("Gini_L2 = ",round((auc2-0.5)*2,digits=4),"\n"),

connfusion matrix