koalaverse / vip

Variable Importance Plots (VIPs)
https://koalaverse.github.io/vip/
186 stars 24 forks source link

question: using vip with parsnip and xgboost #118

Open gutama opened 3 years ago

gutama commented 3 years ago

Hi

I can use vip_permute with glmnet

set.seed(123)
mtcars_split <- initial_split(mtcars, prop = 0.80)
mtcars_train <- training(mtcars_split)
mtcars_test  <-  testing(mtcars_split)

mtcars_folds <- vfold_cv(mtcars_train, v = 10)

glmnet_recipe <- 
  recipe(formula = mpg ~ ., data = mtcars) %>% 
  step_zv(all_predictors()) %>% 
  step_normalize(all_predictors(), -all_nominal()) 

glmnet_spec <- 
  linear_reg(penalty = tune(), mixture = tune()) %>% 
  set_mode("regression") %>% 
  set_engine("glmnet") 

glmnet_workflow <- 
  workflow() %>% 
  add_recipe(glmnet_recipe) %>% 
  add_model(glmnet_spec) 

glmnet_grid <- tidyr::crossing(penalty = 10^seq(-6, -1, length.out = 20), mixture = c(0.05, 
    0.2, 0.4, 0.6, 0.8, 1)) 

glmnet_tune <- 
  tune_grid(glmnet_workflow, resamples = mtcars_folds, grid = glmnet_grid)

final_glmnet_wflow <- 
  glmnet_workflow %>% 
     finalize_workflow(select_best(glmnet_tune, metric = "rmse"))

final_glmnet_fit <- 
  final_glmnet_wflow %>% 
     fit(data = mtcars_train)

vi(pull_workflow_fit(final_glmnet_fit), methods="firm")

but somehow it wont works on xgboost

set.seed(123)
mtcars_split <- initial_split(mtcars, prop = 0.80)
mtcars_train <- training(mtcars_split)
mtcars_test  <-  testing(mtcars_split)

mtcars_folds <- vfold_cv(mtcars_train, v = 10)

xgboost_recipe <- 
  recipe(formula = mpg ~ ., data = mtcars) %>% 
  step_zv(all_predictors()) 

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
    loss_reduction = tune()) %>% 
  set_mode("regression") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_recipe) %>% 
  add_model(xgboost_spec) 

xgboost_grid <- grid_latin_hypercube(trees(), min_n(), tree_depth(), learn_rate(), loss_reduction(), size = 10)

xgboost_tune <-
  tune_grid(xgboost_workflow, resamples = mtcars_folds, grid = xgboost_grid)

final_xgboost_wflow <- 
  xgboost_workflow %>% 
     finalize_workflow(select_best(xgboost_tune, metric = "rmse"))

final_xgboost_fit <- 
  final_xgboost_wflow %>% 
     fit(data = mtcars_train)

vi(pull_workflow_fit(final_xgboost_fit), method="firm")

how to make the pred_wrapper for xgboost

pfun <- function(object, newdata) predict(object, new_data = as.matrix(newdata))$.pred

vi_s <- vi(pull_workflow_fit(final_xgboost_fit), method="permute", train = mtcars_train %>% select(-mpg), target=mtcars_train$mpg, metric="rmse",
           pred_wrapper = pfun)

Error in inherits(newdata, "xgb.DMatrix") : argument "newdata" is missing, with no default

thank you for the answer

topepo commented 3 years ago

I'm working on a PR for a workflow interface

topepo commented 3 years ago

Never mind... it is in #99 @Athospd beat me to it.