Open szilard opened 3 years ago
xgboost:
suppressMessages({
library(data.table)
library(ROCR)
library(xgboost)
library(Matrix)
})
set.seed(123)
d_train <- fread("train-1m.csv", showProgress=FALSE)
d_test <- fread("test.csv", showProgress=FALSE)
X_train_test <- sparse.model.matrix(dep_delayed_15min ~ .-1, data = rbind(d_train, d_test))
n1 <- nrow(d_train)
n2 <- nrow(d_test)
X_train <- X_train_test[1:n1,]
X_test <- X_train_test[(n1+1):(n1+n2),]
dxgb_train <- xgb.DMatrix(data = X_train, label = ifelse(d_train$dep_delayed_15min=='Y',1,0))
auc <- function() {
phat <- predict(md, newdata = X_test)
rocr_pred <- prediction(phat, d_test$dep_delayed_15min)
cat(performance(rocr_pred, "auc")@y.values[[1]],"\n")
}
length(X_train@x)/nrow(X_train)
system.time({
md <- xgb.train(data = dxgb_train,
objective = "binary:logistic",
nround = 100, max_depth = 10, eta = 0.1,
tree_method = "hist")
})
auc()
system.time({
md <- xgb.train(data = dxgb_train,
objective = "binary:logistic",
nround = 1, num_parallel_tree = 100, max_depth = 10,
subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
tree_method = "hist")
})
auc()
GBM:
> system.time({
+ md <- xgb.train(data = dxgb_train,
+ objective = "binary:logistic",
+ nround = 100, max_depth = 10, eta = 0.1,
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:25:50] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:25:50] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:25:50] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1314 extra nodes, 0 pruned nodes, max_depth=10
[07:25:50] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:25:50] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1324 extra nodes, 0 pruned nodes, max_depth=10
[07:25:50] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
...
[07:25:53] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:25:53] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 550 extra nodes, 0 pruned nodes, max_depth=10
[07:25:53] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:25:53] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 762 extra nodes, 0 pruned nodes, max_depth=10
user system elapsed
63.519 0.056 3.833
> auc()
0.7478858
RF:
> system.time({
+ md <- xgb.train(data = dxgb_train,
+ objective = "binary:logistic",
+ nround = 1, num_parallel_tree = 100, max_depth = 10,
+ subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:28:02] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:28:02] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:28:02] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 572 extra nodes, 0 pruned nodes, max_depth=10
[07:28:02] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1188 extra nodes, 0 pruned nodes, max_depth=10
[07:28:03] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 630 extra nodes, 0 pruned nodes,
...
[07:28:08] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 388 extra nodes, 0 pruned nodes, max_depth=10
[07:28:08] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 482 extra nodes, 0 pruned nodes, max_depth=10
[07:28:08] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 674 extra nodes, 0 pruned nodes, max_depth=10
[07:28:08] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 766 extra nodes, 0 pruned nodes, max_depth=10
user system elapsed
65.832 0.077 5.856
> auc()
0.730241
+ objective = "binary:logistic",
+ nround = 1, num_parallel_tree = 100, max_depth = 15,
+ subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:29:39] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:29:39] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:29:39] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2856 extra nodes, 0 pruned nodes, max_depth=15
[07:29:40] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1232 extra nodes, 0 pruned nodes, max_depth=15
...
[07:29:47] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 3292 extra nodes, 0 pruned nodes, max_depth=15
[07:29:47] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1312 extra nodes, 0 pruned nodes, max_depth=15
[07:29:47] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2982 extra nodes, 0 pruned nodes, max_depth=15
[07:29:47] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 6452 extra nodes, 0 pruned nodes, max_depth=15
user system elapsed
104.609 0.241 8.579
> auc()
0.7410314
> system.time({
+ md <- xgb.train(data = dxgb_train,
+ objective = "binary:logistic",
+ nround = 1, num_parallel_tree = 100, max_depth = 20,
+ subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:30:24] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:30:24] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:30:24] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2326 extra nodes, 0 pruned nodes, max_depth=20
[07:30:24] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2184 extra nodes, 0 pruned nodes, max_depth=20
[07:30:25] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 10138 extra nodes, 0 pruned nodes, max_depth=20
...
[07:30:35] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 13074 extra nodes, 0 pruned nodes, max_depth=20
[07:30:35] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2148 extra nodes, 0 pruned nodes, max_depth=20
[07:30:35] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2186 extra nodes, 0 pruned nodes, max_depth=20
[07:30:35] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 2664 extra nodes, 0 pruned nodes, max_depth=20
[07:30:35] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 3496 extra nodes, 0 pruned nodes, max_depth=20
user system elapsed
156.004 0.675 12.655
> auc()
0.7482527
xgboost with lambda=0
to better match lightgbm and build deeper trees (as per @laurae2)
> system.time({
+ md <- xgb.train(data = dxgb_train,
+ objective = "binary:logistic",
+ nround = 1, num_parallel_tree = 100, max_depth = 10,
+ subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
+ lambda = 0,
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:33:50] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:33:50] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:33:50] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 478 extra nodes, 0 pruned nodes, max_depth=10
[07:33:50] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 778 extra nodes, 0 pruned nodes, max_depth=10
[07:33:50] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 828 extra nodes, 0 pruned nodes, max_depth=10
...
[07:33:56] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1086 extra nodes, 0 pruned nodes, max_depth=10
[07:33:56] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 566 extra nodes, 0 pruned nodes, max_depth=10
[07:33:56] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 474 extra nodes, 0 pruned nodes, max_depth=10
user system elapsed
70.681 0.007 6.126
> auc()
0.7305753
> system.time({
+ md <- xgb.train(data = dxgb_train,
+ objective = "binary:logistic",
+ nround = 1, num_parallel_tree = 100, max_depth = 15,
+ subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
+ lambda = 0,
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:35:11] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:35:11] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:35:11] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 1824 extra nodes, 0 pruned nodes, max_depth=15
[07:35:12] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 7730 extra nodes, 0 pruned nodes, max_depth=15
...
[07:35:20] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 5070 extra nodes, 0 pruned nodes, max_depth=15
[07:35:20] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 6534 extra nodes, 0 pruned nodes, max_depth=15
[07:35:20] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 3456 extra nodes, 0 pruned nodes, max_depth=15
[07:35:20] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 5598 extra nodes, 0 pruned nodes, max_depth=15
user system elapsed
126.601 0.104 9.989
> auc()
0.7406097
> system.time({
+ md <- xgb.train(data = dxgb_train,
+ objective = "binary:logistic",
+ nround = 1, num_parallel_tree = 100, max_depth = 20,
+ subsample = 0.632, colsample_bytree = 1/sqrt(length(X_train@x)/nrow(X_train)),
+ lambda = 0,
+ verbosity = 2,
+ tree_method = "hist")
+ })
[07:36:17] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:36:17] INFO: ../..//amalgamation/../src/gbm/gbtree.cc:177: Tree method is selected to be 'hist', which uses a single updater grow_quantile_histmaker.
[07:36:17] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 15550 extra nodes, 0 pruned nodes, max_depth=20
[07:36:18] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 4658 extra nodes, 0 pruned nodes, max_depth=20
[07:36:18] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 3644 extra nodes, 0 pruned nodes, max_depth=20
...
[07:36:31] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 5448 extra nodes, 0 pruned nodes, max_depth=20
[07:36:31] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 5550 extra nodes, 0 pruned nodes, max_depth=20
[07:36:31] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 9408 extra nodes, 0 pruned nodes, max_depth=20
[07:36:31] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 13178 extra nodes, 0 pruned nodes, max_depth=20
[07:36:31] INFO: ../..//amalgamation/../src/tree/updater_prune.cc:101: tree pruning end, 4156 extra nodes, 0 pruned nodes, max_depth=20
user system elapsed
215.592 0.725 16.913
> auc()
0.7503208
Summary:
1M:
Tool | Depth | Time [s] | AUC |
---|---|---|---|
lightgbm | 10 | 2.3 | 0.7315 |
lightgbm | 15 | 12.3 | 0.7392 |
lightgbm | 20 | 27 | 0.7416 |
xgboost | 10 | 5.8 | 0.7302 |
xgboost | 15 | 8.6 | 0.7410 |
xgboost | 20 | 12 | 0.7482 |
xgboost l=0 | 10 | 6.1 | 0.7306 |
xgboost l=0 | 15 | 10 | 0.7406 |
xgboost l=0 | 20 | 17 | 0.7503 |
h2o:
library(h2o)
h2o.init()
dx_train <- h2o.importFile("train-1m.csv")
dx_test <- h2o.importFile("test.csv")
Xnames <- names(dx_train)[which(names(dx_train)!="dep_delayed_15min")]
system.time({
md <- h2o.randomForest(x = Xnames, y = "dep_delayed_15min", training_frame = dx_train,
ntrees = 100, max_depth = 10,
nbins = 100)
})
cat(h2o.auc(h2o.performance(md, dx_test)),"\n")
Results:
> system.time({
+ md <- h2o.randomForest(x = Xnames, y = "dep_delayed_15min", training_frame = dx_train,
+ ntrees = 100, max_depth = 10,
+ nbins = 100)
+ })
|======================================================================| 100%
user system elapsed
0.168 0.004 9.215
> cat(h2o.auc(h2o.performance(md, dx_test)),"\n")
0.7372074
> system.time({
+ md <- h2o.randomForest(x = Xnames, y = "dep_delayed_15min", training_frame = dx_train,
+ ntrees = 100, max_depth = 15,
+ nbins = 100)
+ })
|======================================================================| 100%
user system elapsed
0.279 0.007 33.379
> cat(h2o.auc(h2o.performance(md, dx_test)),"\n")
0.7499753
> system.time({
+ md <- h2o.randomForest(x = Xnames, y = "dep_delayed_15min", training_frame = dx_train,
+ ntrees = 100, max_depth = 20,
+ nbins = 100)
+ })
|======================================================================| 100%
user system elapsed
0.648 0.048 110.038
> cat(h2o.auc(h2o.performance(md, dx_test)),"\n")
0.7543568
Rborist:
library(data.table)
library(ROCR)
library(Matrix)
library(Rborist)
set.seed(123)
d_train <- fread("train-1m.csv")
d_test <- fread("test.csv")
X_train_test <- sparse.model.matrix(dep_delayed_15min ~ .-1, data = rbind(d_train, d_test))
X_train <- X_train_test[1:nrow(d_train),]
X_test <- X_train_test[(nrow(d_train)+1):(nrow(d_train)+nrow(d_test)),]
auc <- function() {
phat <- predict(md, newdata = X_test, ctgCensus="prob")$prob[,"Y"]
rocr_pred <- prediction(phat, d_test$dep_delayed_15min == "Y")
performance(rocr_pred, "auc")@y.values[[1]]
}
system.time({
md <- Rborist(X_train, as.factor(d_train$dep_delayed_15min), nLevel=10, nTree=100, predProb = 1/sqrt(length(X_train@x)/nrow(X_train)), thinLeaves=TRUE)
})
auc()
Results:
> system.time({
+ md <- Rborist(X_train, as.factor(d_train$dep_delayed_15min), nLevel=10, nTree=100, predProb = 1/sqrt(length(X_train@x)/nrow(X_train)), thinLeaves=TRUE)
+ })
user system elapsed
240.358 9.202 25.243
> auc()
[1] 0.7198579
> system.time({
+ md <- Rborist(X_train, as.factor(d_train$dep_delayed_15min), nLevel=15, nTree=100, predProb = 1/sqrt(length(X_train@x)/nrow(X_train)), thinLeaves=TRUE)
+ })
user system elapsed
417.049 8.086 35.107
> auc()
[1] 0.7309561
> system.time({
+ md <- Rborist(X_train, as.factor(d_train$dep_delayed_15min), nLevel=20, nTree=100, predProb = 1/sqrt(length(X_train@x)/nrow(X_train)), thinLeaves=TRUE)
+ })
user system elapsed
716.884 7.195 62.628
> auc()
[1] 0.7433575
ranger:
library(data.table)
library(ranger)
library(ROCR)
d_train <- fread("train-1m.csv")
d_test <- fread("test.csv")
d_train$dep_delayed_15min <- as.factor(d_train$dep_delayed_15min)
d_test$dep_delayed_15min <- as.factor(d_test$dep_delayed_15min)
auc <- function() {
phat <- predictions(predict(md, data = d_test))[,"Y"]
rocr_pred <- prediction(phat, d_test$dep_delayed_15min)
performance(rocr_pred, "auc")@y.values[[1]]
}
system.time({
md <- ranger(dep_delayed_15min ~ ., d_train,
num.trees = 100, max.depth = 10, probability = TRUE, write.forest = TRUE)
})
auc()
Results:
> system.time({
+ md <- ranger(dep_delayed_15min ~ ., d_train,
+ num.trees = 100, max.depth = 10, probability = TRUE, write.forest = TRUE)
+ })
user system elapsed
143.398 0.024 10.850
> auc()
[1] 0.7116554
>
>
> system.time({
+ md <- ranger(dep_delayed_15min ~ ., d_train,
+ num.trees = 100, max.depth = 15, probability = TRUE, write.forest = TRUE)
+ })
user system elapsed
216.044 0.080 16.971
> auc()
[1] 0.7191445
>
> system.time({
+ md <- ranger(dep_delayed_15min ~ ., d_train,
+ num.trees = 100, max.depth = 20, probability = TRUE, write.forest = TRUE)
+ })
user system elapsed
295.522 0.516 24.133
> auc()
[1] 0.72058
So far (1M rows, c5.9xlarge, 18 cores, HT off):
Time [sec]:
Tool | depth=10 | depth=15 | depth=20 |
---|---|---|---|
xgboost | 5.8 | 8.6 | 12 |
xgboost lamda=0 | 6.1 | 10 | 17 |
ranger | 11 | 17 | 24 |
lightgbm | 2.3 | 12 | 27 |
Rborist | 25 | 35 | 62 |
h2o | 9.2 | 33 | 110 |
sklearn RF:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from scipy import sparse
from sklearn import metrics, ensemble
d_train = pd.read_csv("https://s3.amazonaws.com/benchm-ml--main/train-1m.csv")
d_test = pd.read_csv("https://s3.amazonaws.com/benchm-ml--main/test.csv")
d_all = pd.concat([d_train,d_test])
vars_cat = ["Month","DayofMonth","DayOfWeek","UniqueCarrier", "Origin", "Dest"]
vars_num = ["DepTime","Distance"]
for col in vars_cat:
d_all[col] = preprocessing.LabelEncoder().fit_transform(d_all[col])
X_all_cat = preprocessing.OneHotEncoder(categories="auto").fit_transform(d_all[vars_cat])
X_all = sparse.hstack((X_all_cat, d_all[vars_num])).tocsr()
y_all = np.where(d_all["dep_delayed_15min"]=="Y",1,0)
X_train = X_all[0:d_train.shape[0],]
y_train = y_all[0:d_train.shape[0]]
X_test = X_all[d_train.shape[0]:(d_train.shape[0]+d_test.shape[0]),]
y_test = y_all[d_train.shape[0]:(d_train.shape[0]+d_test.shape[0])]
md = ensemble.RandomForestClassifier(max_depth = 10, n_estimators = 100, n_jobs = -1)
%time md.fit(X_train, y_train)
y_pred = md.predict_proba(X_test)[:,1]
print(metrics.roc_auc_score(y_test, y_pred))
Results:
md = ensemble.RandomForestClassifier(max_depth = 10, n_estimators = 100, n_jobs = -1)
Wall time: 9.79 s
0.703149121562214
md = ensemble.RandomForestClassifier(max_depth = 15, n_estimators = 100, n_jobs = -1)
Wall time: 20.6 s
0.7085553315997604
md = ensemble.RandomForestClassifier(max_depth = 20, n_estimators = 100, n_jobs = -1)
Wall time: 41.8 s
0.7144237796242365
So far (1M rows, c5.9xlarge, 18 cores, HT off):
Time [sec]:
Tool | depth=10 | depth=15 | depth=20 |
---|---|---|---|
xgboost | 5.8 | 8.6 | 12 |
xgboost lamda=0 | 6.1 | 10 | 17 |
ranger | 11 | 17 | 24 |
lightgbm | 2.3 | 12 | 27 |
sklearn | 10 | 21 | 42 |
Rborist | 25 | 35 | 62 |
h2o | 9.2 | 33 | 110 |
You might be interested in cuml: https://medium.com/rapids-ai/accelerating-random-forests-up-to-45x-using-cuml-dfb782a31bea
Rforestry via @laurae2
library(Rforestry)
library(data.table)
library(ROCR)
d_train <- fread("https://s3.amazonaws.com/benchm-ml--main/train-1m.csv", stringsAsFactors=TRUE)
d_test_char <- fread("https://s3.amazonaws.com/benchm-ml--main/test.csv")
p <- 8
d_all <- rbind(d_train, d_test_char)
d_test <- d_all[(nrow(d_train)+1):(nrow(d_train)+nrow(d_test_char))]
system.time({
md <- forestry(x = d_train[,1:p], y = d_train$dep_delayed_15min, ntree = 100, maxDepth = 10)
})
phat <- predict(md, d_test[,1:p])
rocr_pred <- prediction(phat, d_test$dep_delayed_15min)
cat(performance(rocr_pred, "auc")@y.values[[1]],"\n")
Run (1M rows, depth=10, c5.9xlarge, 18 cores, HT off):
user system elapsed
654.304 8.341 38.285
>
> cat(performance(rocr_pred, "auc")@y.values[[1]],"\n")
0.719672
c5.9xlarge (18 cores, HT off):
1M:
Lightgbm:
Results:
GBM:
RF:
GBM deep: