spsanderson / healthyR.ai

healthyR.ai - AI package for the healthyverse
http://www.spsanderson.com/healthyR.ai/
Other
16 stars 6 forks source link

Error with step function inside tune_grid #81

Closed spsanderson closed 3 years ago

spsanderson commented 3 years ago

I use a custom step function that I have built for a package of mine that works just fine, it is in a local copy of the package (Not yet submitted to CRAN) link to function here: step_hai_fourier

Here is the session info (we can see healthyR.ai 0.0.2.9000 is loaded):

> sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] timetk_2.6.1               tidyquant_1.0.3            quantmod_0.4.18           
 [4] TTR_0.24.2                 PerformanceAnalytics_2.0.4 xts_0.12.1                
 [7] zoo_1.8-9                  lubridate_1.8.0            forcats_0.5.1             
[10] stringr_1.4.0              readr_2.0.2                tidyverse_1.3.1           
[13] janitor_2.1.0              healthyR.ts_0.1.4          __healthyR.ai_0.0.2.9000__   
[16] yardstick_0.0.8            workflowsets_0.1.0         workflows_0.2.4           
[19] tune_0.1.6                 tidyr_1.1.4                tibble_3.1.6              
[22] rsample_0.1.1              recipes_0.1.17             purrr_0.3.4               
[25] parsnip_0.1.7              modeldata_0.1.1            infer_1.0.0               
[28] ggplot2_3.3.5              dplyr_1.0.7                dials_0.0.10              
[31] scales_1.1.1               broom_0.7.10               tidymodels_0.1.4          
[34] modeltime_1.1.0           

loaded via a namespace (and not attached):
  [1] readxl_1.3.1         backports_1.3.0      plyr_1.8.6           lazyeval_0.2.2      
  [5] splines_4.1.0        crosstalk_1.2.0      listenv_0.8.0        digest_0.6.28       
  [9] foreach_1.5.1        htmltools_0.5.2      fansi_0.5.0          checkmate_2.0.0     
 [13] magrittr_2.0.1       doParallel_1.0.16    tzdb_0.2.0           globals_0.14.0      
 [17] modelr_0.1.8         gower_0.2.2          RcppParallel_5.1.4   vroom_1.5.5         
 [21] hardhat_0.1.6        forecast_8.15        tseries_0.10-48      colorspace_2.0-2    
 [25] rvest_1.0.2          haven_2.4.3          crayon_1.4.2         jsonlite_1.7.2      
 [29] survival_3.2-11      iterators_1.0.13     glue_1.5.0           gtable_0.3.0        
 [33] ipred_0.9-12         Quandl_2.11.0        future.apply_1.8.1   DBI_1.1.1           
 [37] Rcpp_1.0.7           viridisLite_0.4.0    GPfit_1.0-8          bit_4.0.4           
 [41] lava_1.6.10          StanHeaders_2.21.0-7 prodlim_2019.11.13   htmlwidgets_1.5.4   
 [45] httr_1.4.2           ellipsis_0.3.2       pkgconfig_2.0.3      sass_0.4.0          
 [49] nnet_7.3-16          dbplyr_2.1.1         utf8_1.2.2           tidyselect_1.1.1    
 [53] labeling_0.4.2       rlang_0.4.12         DiceDesign_1.9       munsell_0.5.0       
 [57] cellranger_1.1.0     tools_4.1.0          xgboost_1.5.0.1      cli_3.1.0           
 [61] generics_0.1.1       fastmap_1.1.0        yaml_2.2.1           bit64_4.0.5         
 [65] fs_1.5.0             future_1.23.0        nlme_3.1-152         xml2_1.3.2          
 [69] compiler_4.1.0       rstudioapi_0.13      plotly_4.10.0        curl_4.3.2          
 [73] gt_0.3.1             reprex_2.0.1         lhs_1.1.3            stringi_1.7.5       
 [77] lattice_0.20-44      Matrix_1.3-4         urca_1.3-0           vctrs_0.3.8         
 [81] pillar_1.6.4         lifecycle_1.0.1      furrr_0.2.3          lmtest_0.9-39       
 [85] data.table_1.14.2    R6_2.5.1             parallelly_1.28.1    codetools_0.2-18    
 [89] MASS_7.3-54          assertthat_0.2.1     withr_2.4.2          fracdiff_1.5-1      
 [93] hms_1.1.1            quadprog_1.5-8       grid_4.1.0           rpart_4.1-15        
 [97] timeDate_3043.102    class_7.3-19         snakecase_0.11.0     pROC_1.18.0   

Here is my script showing that the prep and 'juice` work just fine:

# Libraries ----
library(modeltime)
library(tidymodels)
library(healthyR.ai)
library(healthyR.ts)
library(parallel)
library(janitor)
library(tidyverse)
library(tidyquant)
library(timetk)

# Data ----
url      <- "https://cci30.com/ajax/getIndexHistory.php"
destfile <- "00_data/cci30_OHLCV.csv"
download.file(url, destfile = destfile)

cci_index_tbl <- read_csv("00_data/cci30_OHLCV.csv") %>%
  clean_names()

# * Daily Log Returns ----
time_param <- "weekly"
log_returns_tbl <- cci_index_tbl %>%
  tq_transmute(
    select = close
    , mutate_fun = periodReturn
    , period = time_param
    , type = "log"
    , col_rename = "value"
  ) %>%
  set_names("date_col", "value")

# * Train/Test ----
splits <- log_returns_tbl %>%
  time_series_split(
    date_var = date_col
    , assess = "12 weeks"
    , cumulative = TRUE
  )

splits %>%
  tk_time_series_cv_plan() %>%
  plot_time_series_cv_plan(
    .date_var = date_col
    , .value  = value
    , .title  = paste0(
      "CCI30 ", stringr::str_to_title(time_param), " Log Returns"
    )
  )

n_cores <- detectCores() - 1

# Recipe ----
recipe_base <- recipe(value ~ ., data = training(splits)) %>%
  step_hai_fourier(value, scale_type = "sincos", period = 12, order = 1)

recipe_base %>% prep() %>% juice()

And console says:

> recipe_base %>% prep() %>% juice()
# A tibble: 347 x 3
   date_col      value value_sincos
   <date>        <dbl>        <dbl>
 1 2015-01-04 -0.196       -0.102  
 2 2015-01-11 -0.00811     -0.00425
 3 2015-01-18 -0.209       -0.108  
 4 2015-01-25  0.185        0.0961 
 5 2015-02-01 -0.141       -0.0736 
 6 2015-02-08 -0.0231      -0.0121 
 7 2015-02-15  0.0208       0.0109 
 8 2015-02-22 -0.0180      -0.00941
 9 2015-03-01  0.0841       0.0440 
10 2015-03-08 -0.0182      -0.00955
# ... with 337 more rows

Ok perfect so far, here is the rest of the script:

# Model ----
# Boosted Auto ARIMA
model_spec_arima_boosted <- arima_boost(
  min_n = 2
  , learn_rate = 0.015
) %>%
  set_engine(engine = "auto_arima_xgboost")

# Workflowset ----
wfsets <- workflow_set(
  preproc = list(
    base          = recipe_base
  ),
  models = list(
    model_spec_arima_boosted
  ),
  cross = TRUE
)

parallel_start(n_cores)
wf_fits <- wfsets %>% 
  modeltime_fit_workflowset(
    data = training(splits)
    , control = control_fit_workflowset(
      allow_par = TRUE
      , verbose = TRUE
    )
  )
parallel_stop()

wf_fits <- wf_fits %>%
  filter(.model != "NULL")

# Model Table -------------------------------------------------------------

models_tbl <- wf_fits

# Calibrate Model Testing -------------------------------------------------

parallel_start(n_cores)

calibration_tbl <- models_tbl %>%
  modeltime_calibrate(new_data = testing(splits))

parallel_stop()

calibration_tbl

# Testing Accuracy --------------------------------------------------------

parallel_start(n_cores)

calibration_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = log_returns_tbl
  ) %>%
  plot_modeltime_forecast(
    .legend_max_width   = 25,
    .interactive        = TRUE,
    .conf_interval_show = FALSE
  )

parallel_stop()

calibration_tbl %>%
  modeltime_accuracy() %>%
  drop_na() %>%
  arrange(desc(rsq)) %>%
  table_modeltime_accuracy(.interactive = FALSE)

# Model Tuning ----
# Get Model
plucked_model <- calibration_tbl %>%
  modeltime::pluck_modeltime_model(1)

training_data <- rsample::training(splits)

tscv <- timetk::time_series_cv(
  data        = training_data,
  date_var    = date_col,
  cumulative  = TRUE,
  assess      = "26 weeks",
  skip        = "4 weeks",
  slice_limit = 6
)

# * Tune Spec ----
# Model Spec
model_spec <- plucked_model %>% parsnip::extract_spec_parsnip()
model_spec_engine <- model_spec[["engine"]]
model_spec_tuner <- healthyR.ts::ts_model_spec_tune_template(model_spec_engine)

# * Grid Spec ----
grid_spec <- dials::grid_latin_hypercube(
  tune::parameters(model_spec_tuner),
  size = 30
)

# * Tune Model ----
wflw_tune_spec <- plucked_model %>%
  workflows::update_model(model_spec_tuner)

# * Run Tuning Grid ----
modeltime::parallel_start(n_cores)

# THIS FAILS ----
tune_results <- wflw_tune_spec %>%
  tune::tune_grid(
    resamples = tscv,
    grid = grid_spec,
    metrics = modeltime::default_forecast_accuracy_metric_set(),
    control = tune::control_grid(
      verbose = TRUE,
      save_pred = TRUE
    )
  )

> tune_results$.notes[[1]]
# A tibble: 1 x 1
  .notes                                                                                            
  <chr>                                                                                             
1 "preprocessor 1/1: Error in UseMethod(\"prep\"): no applicable method for 'prep' applied to an ob~

Not sure why this is happening, also not sure if this belongs here or in recipes, I placed it here because it was failing inside of the tun_grid.

spsanderson commented 3 years ago

May need to add the following:

 if (rlang::is_installed("tune") && utils::packageVersion("tune") >= "0.1.1.9000") {

        req_pkgs_names <- grep("^required_pkgs\\.", names, value = TRUE)
        req_pkgs_classes <- gsub("required_pkgs.", "", req_pkgs_names)

        for (i in seq_along(req_pkgs_names)) {
            class <- req_pkgs_classes[[i]]
            s3_register("tune::required_pkgs", class)
        }
    }