business-science / modeltime.ensemble

Time Series Ensemble Forecasting
https://business-science.github.io/modeltime.ensemble/
Other
73 stars 16 forks source link

Information about xgboost engine specific parameters in metalearner output (ensemble_model_spec, param_info) #21

Closed lg1000 closed 2 years ago

lg1000 commented 2 years ago

As I am starting to tune the engine specific parameters in my models, I stumbled across an issue, while tuning scale weights and L1+L2 regularization. When I define an additional parameter input via param_input, I do not get any error or warning messages, while tuning via ensemble_model_spec, but I do not see any results either. Where do I find information about engine specific parameter tuning results? Maybe I did misspecify something. Here a small code example:

# META XGB
# scaling
scale_pos_weight <- function(range = c(0.8, 1.2), trans = NULL) {
  new_quant_param(
    type = "double",
    range = range,
    inclusive = c(TRUE, TRUE),
    trans = trans,
    default = 1,
    label = c(scale_pos_weight = "Balance of Events and Non-Events"),
    finalize = NULL
  )
}
# L1 and L2 regularization
penalty_L2 <- function(range = c(-10, 1), trans = log10_trans()) {
  new_quant_param(
    type = "double",
    range = range,
    inclusive = c(TRUE, TRUE),
    trans = trans,
    label = c(penalty_L2 = "Amount of L2 Regularization"),
    finalize = NULL
  )
}
penalty_L1 <- function(range = c(-10, 1), trans = log10_trans()) {
  new_quant_param(
    type = "double",
    range = range,
    inclusive = c(TRUE, TRUE),
    trans = trans,
    label = c(penalty_L1 = "Amount of L1 Regularization"),
    finalize = NULL
  )
}
# hypercube grid
set.seed(23)
xgb_grid_stack <- grid_latin_hypercube(
  learn_rate(range = c(-6.0, -1.0)
  ),
  finalize(mtry(range = c(3, ncol(training(splits))-4)
  ), training(splits)
  ),
  min_n(range = c(2, 25)
  ),
  tree_depth(range = c(2, 17)
  ),
  sample_prop(range = c(0.75, 0.95)
    ),
  loss_reduction(
  ),
  size = 30
)
param_set <- parameters(list(
  scale_pos_weight(),
  penalty_L1(),
  penalty_L2()
  )
  )
set.seed(123)
cores <- parallel::detectCores(logical = FALSE)
cl <- makePSOCKcluster(cores)
registerDoParallel(cl)
ensemble_fit_xgboost <- submodel_predictions %>%
  ensemble_model_spec(
    model_spec = boost_tree(
      trees            = 1000,
      tree_depth       = tune(),
      learn_rate       = tune(),
      min_n            = tune(), 
      mtry             = tune(),
      sample_size      = tune(),  
      loss_reduction   = tune(),
    ) %>%
      set_engine("xgboost"),
    kfolds = 10,
    grid   = xgb_grid_stack,
    param_info = param_set,
    control = control_grid(verbose = TRUE,
      allow_par = TRUE
    )
  )
stopCluster(cl)