Open spsanderson opened 3 years ago
data_tbl.xlsx
I am running a script where I have created a cross validation that is being passed to modeltime_refit I do believe this may be an underlying issue with tune but am posting here because I am using the modeltime_refit function. Data is attached.
modeltime_refit
tune
> sessionInfo() R version 4.0.3 (2020-10-10) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 19041) Matrix products: default Random number generation: RNG: L'Ecuyer-CMRG Normal: Inversion Sample: Rejection locale: [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 [3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C [5] LC_TIME=English_United States.1252 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] forecast_8.14 xgboost_1.4.1.1 vctrs_0.3.8 [4] rlang_0.4.11 modeltime.h2o_0.1.1 h2o_3.32.1.3 [7] modeltime.ensemble_0.4.1 modeltime.resample_0.2.0 tidyquant_1.0.3 [10] quantmod_0.4.18 TTR_0.24.2 PerformanceAnalytics_2.0.4 [13] xts_0.12.1 zoo_1.8-9 janitor_2.1.0 [16] DBI_1.1.1 odbc_1.3.2 timetk_2.6.1 [19] lubridate_1.7.10 forcats_0.5.1 stringr_1.4.0 [22] readr_1.4.0 tidyverse_1.3.1 modeltime_0.6.0 [25] yardstick_0.0.8 workflowsets_0.0.2 workflows_0.2.2 [28] tune_0.1.5 tidyr_1.1.3 tibble_3.1.2 [31] rsample_0.1.0 recipes_0.1.16 purrr_0.3.4 [34] parsnip_0.1.6 modeldata_0.1.0 infer_0.5.4 [37] ggplot2_3.3.3 dplyr_1.0.6 dials_0.0.9 [40] scales_1.1.1 broom_0.7.6 tidymodels_0.1.3 [43] pacman_0.5.1 loaded via a namespace (and not attached): [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 lazyeval_0.2.2 [5] splines_4.0.3 crosstalk_1.1.1 listenv_0.8.0 inline_0.3.19 [9] digest_0.6.27 foreach_1.5.1 htmltools_0.5.1.1 earth_5.3.0 [13] fansi_0.5.0 magrittr_2.0.1 xlsx_0.6.5 globals_0.14.0 [17] modelr_0.1.8 gower_0.2.2 matrixStats_0.59.0 RcppParallel_5.1.4 [21] hardhat_0.1.5 prettyunits_1.1.1 tseries_0.10-48 colorspace_2.0-1 [25] blob_1.2.1 rvest_1.0.0 haven_2.4.1 callr_3.7.0 [29] crayon_1.4.1 RCurl_1.98-1.3 jsonlite_1.7.2 progressr_0.7.0 [33] survival_3.2-11 iterators_1.0.13 glue_1.4.2 gtable_0.3.0 [37] ipred_0.9-11 V8_3.4.2 pkgbuild_1.2.0 rstan_2.21.2 [41] Quandl_2.10.0 Rcpp_1.0.6 plotrix_3.8-1 viridisLite_0.4.0 [45] GPfit_1.0-8 bit_4.0.4 Formula_1.2-4 stats4_4.0.3 [49] lava_1.6.9 StanHeaders_2.21.0-7 prodlim_2019.11.13 htmlwidgets_1.5.3 [53] httr_1.4.2 ellipsis_0.3.2 rJava_1.0-4 loo_2.4.1 [57] pkgconfig_2.0.3 farver_2.1.0 nnet_7.3-16 dbplyr_2.1.1 [61] utf8_1.2.1 tidyselect_1.1.1 labeling_0.4.2 DiceDesign_1.9 [65] reactR_0.4.4 TeachingDemos_2.12 munsell_0.5.0 cellranger_1.1.0 [69] tools_4.0.3 cli_2.5.0 generics_0.1.0 yaml_2.2.1 [73] processx_3.5.2 bit64_4.0.5 fs_1.5.0 nlme_3.1-152 [77] future_1.21.0 reactable_0.2.3 tictoc_1.0.1 xml2_1.3.2 [81] LICHospitalR_0.2.0 compiler_4.0.3 rstudioapi_0.13 plotly_4.9.3 [85] curl_4.3.1 reprex_2.0.0 lhs_1.1.1 stringi_1.6.2 [89] plotmo_3.6.0 ps_1.6.0 lattice_0.20-44 Matrix_1.3-4 [93] urca_1.3-0 pillar_1.6.1 lifecycle_1.0.0 furrr_0.2.2 [97] lmtest_0.9-38 data.table_1.14.0 bitops_1.0-7 R6_2.5.0 [101] gridExtra_2.3 parallelly_1.25.0 codetools_0.2-18 MASS_7.3-54 [105] assertthat_0.2.1 xlsxjars_0.6.1 withr_2.4.2 fracdiff_1.5-1 [109] parallel_4.0.3 hms_1.1.0 quadprog_1.5-8 grid_4.0.3 [113] rpart_4.1-15 timeDate_3043.102 class_7.3-19 snakecase_0.11.0 [117] prophet_1.0 pROC_1.17.0.1
Script Fails at this:
resample_tscv <- training(splits) %>% time_series_cv( date_var = date_col , assess = "12 months" , initial = "24 months" , skip = "3 months" , slice_limit = 1 ) refit_tbl <- calibration_tbl %>% modeltime_refit( data = data_tbl , resamples = resample_tscv , control = control_resamples(verbose = TRUE) ) # Fail
The error message produced:
> refit_tbl <- calibration_tbl %>% + modeltime_refit( + data = data_tbl + , resamples = resample_tscv + , control = control_resamples(verbose = TRUE) + ) Error in if ((control$cores > 1) && control$allow_par) { : missing value where TRUE/FALSE needed
Full script:
# Lib Load ---------------------------------------------------------------- if(!require(pacman)) install.packages("pacman") pacman::p_load( "tidymodels", "modeltime", "tidyverse", "lubridate", "timetk", "odbc", "DBI", "janitor", "timetk", "tidyquant", "modeltime.ensemble", "modeltime.resample", "modeltime.h2o" ) interactive <- TRUE # Read Data ---- data_final_tbl <- # read in the excel file here # Data Split -------------------------------------------------------------- data_final_tbl <- data_tbl %>% select(date_col, excess_days) splits <- initial_time_split( data_final_tbl , prop = 0.8 , cumulative = TRUE ) # Features ---------------------------------------------------------------- recipe_base <- recipe(excess_days ~ ., data = training(splits)) %>% step_timeseries_signature(date_col) recipe_final <- recipe_base %>% step_rm(matches("(iso$)|(xts$)|(hour)|(min)|(sec)|(am.pm)")) %>% step_normalize(contains("index.num"), date_col_year) %>% step_dummy(contains("lbl"), one_hot = TRUE) %>% step_fourier(date_col, period = 365/12, K = 2) %>% step_holiday_signature(date_col) %>% step_YeoJohnson(excess_days) # Models ------------------------------------------------------------------ # Auto ARIMA -------------------------------------------------------------- model_spec_arima_no_boost <- arima_reg() %>% set_engine(engine = "auto_arima") wflw_fit_arima_no_boost <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_arima_no_boost) %>% fit(training(splits)) # Boosted Auto ARIMA ------------------------------------------------------ model_spec_arima_boosted <- arima_boost( min_n = 2 , learn_rate = 0.015 ) %>% set_engine(engine = "auto_arima_xgboost") wflw_fit_arima_boosted <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_arima_boosted) %>% fit(training(splits)) # ETS --------------------------------------------------------------------- model_spec_ets <- exp_smoothing() %>% set_engine(engine = "ets") wflw_fit_ets <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_ets) %>% fit(training(splits)) model_spec_croston <- exp_smoothing() %>% set_engine(engine = "croston") wflw_fit_croston <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_croston) %>% fit(training(splits)) model_spec_theta <- exp_smoothing() %>% set_engine(engine = "theta") wflw_fit_theta <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_theta) %>% fit(training(splits)) # STLM ETS ---------------------------------------------------------------- model_spec_stlm_ets <- seasonal_reg() %>% set_engine("stlm_ets") wflw_fit_stlm_ets <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_stlm_ets) %>% fit(training(splits)) model_spec_stlm_tbats <- seasonal_reg() %>% set_engine("tbats") wflw_fit_stlm_tbats <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_stlm_tbats) %>% fit(training(splits)) model_spec_stlm_arima <- seasonal_reg() %>% set_engine("stlm_arima") wflw_fit_stlm_arima <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_stlm_arima) %>% fit(training(splits)) # NNETAR ------------------------------------------------------------------ model_spec_nnetar <- nnetar_reg() %>% set_engine("nnetar") wflw_fit_nnetar <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_nnetar) %>% fit(training(splits)) # Prophet ----------------------------------------------------------------- model_spec_prophet <- prophet_reg() %>% set_engine(engine = "prophet") wflw_fit_prophet <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_prophet) %>% fit(training(splits)) model_spec_prophet_boost <- prophet_boost(learn_rate = 0.1) %>% set_engine("prophet_xgboost") wflw_fit_prophet_boost <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_prophet_boost) %>% fit(training(splits)) # TSLM -------------------------------------------------------------------- model_spec_lm <- linear_reg() %>% set_engine("lm") wflw_fit_lm <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_lm) %>% fit(training(splits)) # MARS -------------------------------------------------------------------- model_spec_mars <- mars(mode = "regression") %>% set_engine("earth") wflw_fit_mars <- workflow() %>% add_recipe(recipe = recipe_final) %>% add_model(model_spec_mars) %>% fit(training(splits)) # Model Table ------------------------------------------------------------- models_tbl <- modeltime_table( #wflw_fit_arima_no_boost, wflw_fit_arima_boosted, wflw_fit_ets, wflw_fit_theta, wflw_fit_stlm_ets, wflw_fit_stlm_tbats, wflw_fit_nnetar, wflw_fit_prophet, wflw_fit_prophet_boost, wflw_fit_lm, wflw_fit_mars ) # Model Ensemble Table ---------------------------------------------------- resample_tscv <- training(splits) %>% time_series_cv( date_var = date_col , assess = "12 months" , initial = "24 months" , skip = "3 months" , slice_limit = 1 ) # This fails from # submodel_predictions <- models_tbl %>% modeltime_fit_resamples( resamples = resample_tscv , control = control_resamples(verbose = TRUE) ) ensemble_fit <- submodel_predictions %>% ensemble_model_spec( model_spec = linear_reg( penalty = tune() , mixture = tune() ) %>% set_engine("glmnet") , kfold = 5 , grid = 6 , control = control_grid(verbose = TRUE) ) fit_mean_ensemble <- models_tbl %>% ensemble_average(type = "mean") fit_median_ensemble <- models_tbl %>% ensemble_average(type = "median") # Model Table ------------------------------------------------------------- models_tbl <- modeltime_table( #wflw_fit_arima_no_boost, wflw_fit_arima_boosted, wflw_fit_ets, wflw_fit_theta, wflw_fit_stlm_ets, wflw_fit_stlm_tbats, wflw_fit_nnetar, wflw_fit_prophet, wflw_fit_prophet_boost, wflw_fit_lm, wflw_fit_mars, fit_mean_ensemble, fit_median_ensemble ) models_tbl # Calibrate Model Testing ------------------------------------------------- calibration_tbl <- models_tbl %>% modeltime_calibrate(new_data = testing(splits)) calibration_tbl # Testing Accuracy -------------------------------------------------------- calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = data_tbl ) %>% plot_modeltime_forecast( .legend_max_width = 25, .interactive = interactive, .conf_interval_show = FALSE ) calibration_tbl %>% modeltime_accuracy() %>% arrange(mae) %>% table_modeltime_accuracy(resizable = TRUE, bordered = TRUE) # Refit to all Data ------------------------------------------------------- # **** Failure **** ---- refit_tbl <- calibration_tbl %>% modeltime_refit( data = data_tbl , resamples = resample_tscv , control = control_resamples(verbose = TRUE) ) top_two_models <- refit_tbl %>% modeltime_accuracy() %>% arrange(mae) %>% head(2) ensemble_models <- refit_tbl %>% filter( .model_desc %>% str_to_lower() %>% str_detect("ensemble") ) %>% modeltime_accuracy() model_choices <- rbind(top_two_models, ensemble_models) refit_tbl %>% filter(.model_id %in% model_choices$.model_id) %>% modeltime_forecast(h = "1 year", actual_data = data_tbl) %>% plot_modeltime_forecast( .legend_max_width = 25 , .interactive = FALSE , .conf_interval_show = FALSE ) # Misc -------------------------------------------------------------------- models_tbl %>% modeltime_calibrate(new_data = testing(splits)) %>% modeltime_residuals() %>% plot_modeltime_residuals()
data_tbl.xlsx
I am running a script where I have created a cross validation that is being passed to
modeltime_refit
I do believe this may be an underlying issue withtune
but am posting here because I am using themodeltime_refit
function. Data is attached.Script Fails at this:
The error message produced:
Full script: