slowdown in `check_for_unknowns()`

While watching output with verbose_iter during the workshop I noticed that tune_bayes was spending just as much time generating candidates as it was fitting/predicting with the GP model. It turns out much of this time is spent in check_for_unknowns() via value_transform():

library(tidymodels)
library(modeldata)
library(bonsai)

ames$Sale_Price <- log(ames$Sale_Price)

set.seed(472)
ames_rs <- vfold_cv(ames)

lgbm_spec <- 
  boost_tree(trees = tune(), learn_rate = tune(), min_n = tune()) %>% 
  set_mode("regression") %>% 
  set_engine("lightgbm")

lgbm_wflow <- workflow(Sale_Price ~ ., lgbm_spec)

reg_metrics <- metric_set(mae, rsq)

set.seed(12)
init_res <-
  lgbm_wflow %>%
  tune_grid(
    resamples = ames_rs,
    metrics = reg_metrics
  )

# possibly soon-to-come in rlang: determine % of elapsed time 
# spent in each execution environment when evaluating `expr`
prof_tbl <- function(expr, ..., interval = 0.01) {
  file <- withr::local_tempfile()

  Rprof(file, ..., interval = interval, filter.callframes = TRUE)
  expr
  Rprof(NULL)

  out <- summaryRprof(file)

  out_tbl <- tibble::as_tibble(out$by.total, rownames = "fn")

  out_tbl |>
    dplyr::mutate(fn = gsub("\"", "", fn)) |>
    dplyr::arrange(dplyr::desc(self.pct))
}

p <-
  prof_tbl(
    lgbm_bayes_res <-
      lgbm_wflow %>%
      tune_bayes(
        resamples = ames_rs,
        initial = init_res,
        iter = 20,
        metrics = reg_metrics
      )
  )
#> ! No improvement for 10 iterations; returning current results.

p$total.time[p$fn == "check_for_unknowns"] /
  p$total.time[p$fn == "tune_bayes_workflow"]
#> [1] 0.1214575

tidymodels / dials

slowdown in `check_for_unknowns()` #305