When running a methodList<- c('qrnn','gamboost','rpart','ctree','xgbDART','earth','rlm','kknn','qrf') with dataset (356720 observations and 14 predictors), I noticed that my HPC basically takes more than 4 TB ram with consumes more times than a week. This codes, parallel core utilization is also not significant. It only takes 3 to 5 per cent core utilization in parallel computing.
When running a methodList<- c('qrnn','gamboost','rpart','ctree','xgbDART','earth','rlm','kknn','qrf') with dataset (356720 observations and 14 predictors), I noticed that my HPC basically takes more than 4 TB ram with consumes more times than a week. This codes, parallel core utilization is also not significant. It only takes 3 to 5 per cent core utilization in parallel computing.
My code is below
bakatu<-c("raster","caret","e1071","kernlab","randomForest","parallel", "doParallel","qrnn", "caretEnsemble", "monmlp","arm","mboost","plyr","import","party","partykit","rpart","xgboost","MASS","earth","kknn","quantregForest","future","doMC") lapply(bakatu, require, character.only = TRUE) aa<-readRDS("aa.rds") bb<-readRDS("bb.rds") af<-readRDS("af.rds") bf_train_r<-readRDS("bf_train_r.rds")
cl <-makeCluster(124,type="FORK")
registerDoParallel(cl)
myControl <- trainControl(method="repeatedcv", number=10, repeats=5, allowParallel = TRUE)
control <- trainControl(method="cv", number=10, repeats = 3, savePredictions=TRUE, classProbs=FALSE,search='random',allowParallel = TRUE)
methodList<- c('qrnn','gamboost','rpart','ctree','xgbDART','earth','rlm','kknn','qrf') registerDoSEQ() registerDoMC(cores = 123) models <- caretList(bf~.,data=na.omit(aa), trControl=myControl, methodList=methodList) results.all <- resamples(models) cv.models<-as.data.frame(results.all[2]) summary(results.all) stackControl <- trainControl(method="repeatedcv", number=10, repeats=5, savePredictions=TRUE, allowParallel = TRUE)
set.seed(1856)
stack.rf <- caretStack(models, method="rf", trControl=stackControl) stack.lm <- caretStack(models, method="lm", trControl=stackControl) stack.bayesglm <- caretStack(models, method="bayesglm", trControl=stackControl) stack.blassoAveraged <- caretStack(models, method="blassoAveraged", trControl=stackControl)