ymattu / MlBayesOpt

R package to tune parameters for machine learning(Support Vector Machine, Random Forest, and Xgboost), using bayesian optimization with gaussian process
Other
45 stars 15 forks source link

correct error with reg:linear #68

Open msmith01 opened 4 years ago

msmith01 commented 4 years ago

I discuss the error in a reply to one of the issues here:

https://github.com/ymattu/MlBayesOpt/issues/55

I still get a warning which is something to do with the following line:

    cv_folds <- KFold(datalabel, nfolds = n_folds,
                      stratified = TRUE, seed = seed)

Running the new xgb_cv_opt function with the following data should work:

library(MlBayesOpt)
library(dplyr)
library(Matrix)
library(xgboost)
library(rBayesianOptimization)
df <- iris
label_Species <- iris$Species
xgb_cv_opt(data = df,
           label = label_Species,
           objectfun = "reg:linear", evalmetric = "rmse", n_folds = 2, eta_range = c(0.1, 1L),
           max_depth_range = c(4L, 6L), nrounds_range = c(70, 160L),
           subsample_range = c(0.1, 1L), bytree_range = c(0.4, 1L),
           init_points = 4, n_iter = 10, acq = "ucb", kappa = 2.576, eps = 0,
           optkernel = list(type = "exponential", power = 2), classes = NULL,
           seed = 0)

Output:

> library(MlBayesOpt)
> library(dplyr)
> library(Matrix)
> library(xgboost)
> library(rBayesianOptimization)
> df <- iris
> label_Species <- iris$Species
> xgb_cv_opt(data = df,
+            label = label_Species,
+            objectfun = "reg:linear", evalmetric = "rmse", n_folds = 2, eta_range = c(0.1, 1L),
+            max_depth_range = c(4L, 6L), nrounds_range = c(70, 160L),
+            subsample_range = c(0.1, 1L), bytree_range = c(0.4, 1L),
+            init_points = 4, n_iter = 10, acq = "ucb", kappa = 2.576, eps = 0,
+            optkernel = list(type = "exponential", power = 2), classes = NULL,
+            seed = 0)
elapsed = 0.01  Round = 1   eta_opt = 0.2703    max_depth_opt = 5.0000  nrounds_opt = 153.4572  subsample_opt = 0.8565  bytree_opt = 0.5312 Value = -0.8253 
elapsed = 0.01  Round = 2   eta_opt = 0.7823    max_depth_opt = 5.0000  nrounds_opt = 95.4909   subsample_opt = 0.3862  bytree_opt = 0.7101 Value = -0.1033 
elapsed = 0.01  Round = 3   eta_opt = 0.7520    max_depth_opt = 5.0000  nrounds_opt = 123.1516  subsample_opt = 0.8046  bytree_opt = 0.5614 Value = -0.0852 
elapsed = 0.01  Round = 4   eta_opt = 0.9494    max_depth_opt = 4.0000  nrounds_opt = 79.9325   subsample_opt = 0.3408  bytree_opt = 0.5087 Value = -0.2351 
elapsed = 0.01  Round = 5   eta_opt = 0.5938    max_depth_opt = 5.0000  nrounds_opt = 104.2419  subsample_opt = 0.1563  bytree_opt = 0.9367 Value = -0.1526 
elapsed = 0.01  Round = 6   eta_opt = 0.6991    max_depth_opt = 4.0000  nrounds_opt = 99.4728   subsample_opt = 0.7752  bytree_opt = 0.9837 Value = -0.0516 
elapsed = 0.01  Round = 7   eta_opt = 0.7259    max_depth_opt = 4.0000  nrounds_opt = 157.5705  subsample_opt = 0.9099  bytree_opt = 0.4197 Value = -0.2175 
elapsed = 0.01  Round = 8   eta_opt = 1.0000    max_depth_opt = 4.0000  nrounds_opt = 160.0000  subsample_opt = 1.0000  bytree_opt = 0.9479 Value = -0.0791 
elapsed = 0.01  Round = 9   eta_opt = 0.8652    max_depth_opt = 4.0000  nrounds_opt = 128.5974  subsample_opt = 0.1000  bytree_opt = 1.0000 Value = -0.1177 
elapsed = 0.01  Round = 10  eta_opt = 0.1000    max_depth_opt = 4.0000  nrounds_opt = 70.0000   subsample_opt = 1.0000  bytree_opt = 1.0000 Value = -2.7314 
elapsed = 0.01  Round = 11  eta_opt = 1.0000    max_depth_opt = 4.0000  nrounds_opt = 124.4014  subsample_opt = 1.0000  bytree_opt = 0.9440 Value = -0.0508 
elapsed = 0.01  Round = 12  eta_opt = 0.6357    max_depth_opt = 5.0000  nrounds_opt = 70.0000   subsample_opt = 1.0000  bytree_opt = 0.4000 Value = -0.1727 
elapsed = 0.01  Round = 13  eta_opt = 1.0000    max_depth_opt = 6.0000  nrounds_opt = 138.0422  subsample_opt = 0.1000  bytree_opt = 0.9535 Value = -0.0985 
elapsed = 0.01  Round = 14  eta_opt = 0.7131    max_depth_opt = 4.0000  nrounds_opt = 105.8117  subsample_opt = 1.0000  bytree_opt = 0.6526 Value = -0.1199 

 Best Parameters Found: 
Round = 11  eta_opt = 1.0000    max_depth_opt = 4.0000  nrounds_opt = 124.4014  subsample_opt = 1.0000  bytree_opt = 0.9440 Value = -0.0508 
$Best_Par
      eta_opt max_depth_opt   nrounds_opt subsample_opt    bytree_opt 
    1.0000000     4.0000000   124.4014440     1.0000000     0.9440231 

$Best_Value
[1] -0.050821

$History
    Round   eta_opt max_depth_opt nrounds_opt subsample_opt bytree_opt      Value
 1:     1 0.2702743             5   153.45719     0.8564563  0.5311872 -0.8253185
 2:     2 0.7822927             5    95.49093     0.3861673  0.7100781 -0.1032525
 3:     3 0.7520490             5   123.15158     0.8045662  0.5613704 -0.0851570
 4:     4 0.9493523             4    79.93245     0.3407574  0.5087010 -0.2351015
 5:     5 0.5938150             5   104.24190     0.1562759  0.9367329 -0.1526330
 6:     6 0.6990630             4    99.47276     0.7751549  0.9836891 -0.0515565
 7:     7 0.7259076             4   157.57050     0.9098945  0.4197278 -0.2175365
 8:     8 1.0000000             4   160.00000     1.0000000  0.9479365 -0.0790800
 9:     9 0.8652452             4   128.59735     0.1000000  1.0000000 -0.1176970
10:    10 0.1000000             4    70.00000     1.0000000  1.0000000 -2.7313515
11:    11 1.0000000             4   124.40144     1.0000000  0.9440231 -0.0508210
12:    12 0.6356773             5    70.00000     1.0000000  0.4000000 -0.1727275
13:    13 1.0000000             6   138.04222     0.1000000  0.9535310 -0.0985300
14:    14 0.7130662             4   105.81170     1.0000000  0.6526395 -0.1198510

$Pred
           V1       V2       V3       V4       V5       V6       V7       V8       V9       V10      V11      V12
  1: 1.794260 3.997718 3.872365 4.787029 2.999421 3.817092 3.923461 5.298461 4.692506 0.9798461 5.298461 3.363726
  2: 1.601689 3.997718 3.872365 4.787029 2.999421 3.817092 3.923461 4.594231 4.692506 0.9094231 4.594231 3.363726
  3: 1.601689 3.997718 3.872365 4.787029 2.999421 3.265959 3.387272 4.594231 4.692506 0.9094231 4.594231 3.363726
  4: 1.601689 3.997718 3.872365 4.787029 2.999421 3.265959 3.387272 4.594231 4.692506 0.9094231 4.594231 3.363726
  5: 1.794260 3.997718 3.872365 4.787029 2.999421 3.817092 3.923461 5.298461 4.692506 0.9798461 5.298461 3.363726
 ---                                                                                                             
146: 2.090428 4.892015 4.817782 6.000310 3.703391 4.633369 4.723159 6.407742 4.692506 1.0907743 6.407742 4.142464
147: 2.032701 4.811997 4.760699 5.808878 3.754106 4.518176 4.613933 6.580435 4.735147 1.1080434 6.580435 3.558663
148: 2.090428 4.892015 4.817782 6.000310 3.703391 4.633369 4.723159 6.407742 4.692506 1.0907743 6.407742 4.142464
149: 2.090428 4.892015 4.817782 6.000310 3.703391 4.633369 4.723159 6.407742 4.692506 1.0907743 6.407742 4.142464
150: 2.090428 4.892015 4.817782 6.000310 3.703391 4.633369 4.723159 6.407742 4.692506 1.0907743 6.407742 4.142464
          V13      V14
  1: 5.446154 3.921621
  2: 5.446154 3.419458
  3: 5.446154 3.419458
  4: 5.446154 3.419458
  5: 5.446154 3.921621
 ---                  
146: 5.446154 4.712611
147: 5.379070 4.835752
148: 5.446154 4.712611
149: 5.446154 4.712611
150: 5.446154 4.712611

Warning messages:
1: In matrix(c(sample(index), rep(NA, NA_how_many)), ncol = nfolds) :
  data length [15] is not a sub-multiple or multiple of the number of rows [8]
2: In matrix(c(sample(index), rep(NA, NA_how_many)), ncol = nfolds) :
  data length [43] is not a sub-multiple or multiple of the number of rows [22]
3: In matrix(c(sample(index), rep(NA, NA_how_many)), ncol = nfolds) :
  data length [109] is not a sub-multiple or multiple of the number of rows [55]
4: In matrix(c(sample(index), rep(NA, NA_how_many)), ncol = nfolds) :
  data length [107] is not a sub-multiple or multiple of the number of rows [54]
5: In matrix(c(sample(index), rep(NA, NA_how_many)), ncol = nfolds) :
  data length [133] is not a sub-multiple or multiple of the number of rows [67]