Open mdancho84 opened 2 years ago
I've added support for nested forecasting with ensembles. There are 2 new algorithms:
# Setup
library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(timetk)
data_tbl <- walmart_sales_weekly %>%
select(id, Date, Weekly_Sales) %>%
set_names(c("id", "date", "value"))
nested_data_tbl <- data_tbl %>%
extend_timeseries(
.id_var = id,
.date_var = date,
.length_future = 52
) %>%
nest_timeseries(
.id_var = id,
.length_future = 52,
.length_actual = 52*2
) %>%
split_nested_timeseries(
.length_test = 52
)
nested_data_tbl <- nested_data_tbl %>% slice(1:2)
rec_prophet <- recipe(value ~ date, extract_nested_train_split(nested_data_tbl))
wflw_prophet <- workflow() %>%
add_model(
prophet_reg("regression", seasonality_yearly = TRUE) %>%
set_engine("prophet")
) %>%
add_recipe(rec_prophet)
rec_xgb <- recipe(value ~ ., extract_nested_train_split(nested_data_tbl)) %>%
step_timeseries_signature(date) %>%
step_rm(date) %>%
step_zv(all_predictors()) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE)
wflw_xgb <- workflow() %>%
add_model(boost_tree("regression") %>% set_engine("xgboost")) %>%
add_recipe(rec_xgb)
nested_modeltime_tbl <- modeltime_nested_fit(
# Nested data
nested_data = nested_data_tbl,
# Add workflows
wflw_prophet,
wflw_xgb
)
#> Fitting models on training data... ===============>--------------- 50% | ET...
#> ```
# ENSEMBLE AVERAGE TESTING ----
parallel_start(6)
ensem <- nested_modeltime_tbl %>%
ensemble_nested_average(
type = "mean",
keep_submodels = TRUE,
control = control_nested_fit(allow_par = FALSE, verbose = TRUE)
) %>%
ensemble_nested_average(
type = "median",
keep_submodels = TRUE,
model_ids = c(1,2),
control = control_nested_fit(allow_par = FALSE, verbose = TRUE)
)
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.125003 secs.
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.133003 secs.
#>
ensem
#> # Nested Modeltime Table
#>
#> Trained on: .splits | Model Errors: [0]
#> # A tibble: 2 x 5
#> id .actual_data .future_data .splits .modeltime_tables
#> <fct> <list> <list> <list> <list>
#> 1 1_1 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [4 x~
#> 2 1_3 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [4 x~
ensem %>% extract_nested_modeltime_table()
#> # A tibble: 4 x 6
#> id .model_id .model .model_desc .type .calibration_da~
#> <fct> <dbl> <list> <chr> <chr> <list>
#> 1 1_1 1 <workflow> PROPHET Test <tibble [52 x 4~
#> 2 1_1 2 <workflow> XGBOOST Test <tibble [52 x 4~
#> 3 1_1 3 <ensemble [2]> ENSEMBLE (MEAN): 2 MODELS Test <tibble [52 x 4~
#> 4 1_1 4 <ensemble [2]> ENSEMBLE (MEDIAN): 2 MODELS Test <tibble [52 x 4~
ensem %>% extract_nested_test_accuracy()
#> # A tibble: 8 x 10
#> id .model_id .model_desc .type mae mape mase smape rmse rsq
#> <fct> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1_1 1 PROPHET Test 10071. 45.9 1.99 60.0 11777. 0.383
#> 2 1_1 2 XGBOOST Test 6237. 25.3 1.23 24.6 9017. 0.191
#> 3 1_1 3 ENSEMBLE (MEAN): ~ Test 5419. 20.2 1.07 22.2 8655. 0.415
#> 4 1_1 4 ENSEMBLE (MEDIAN)~ Test 5419. 20.2 1.07 22.2 8655. 0.415
#> 5 1_3 1 PROPHET Test 3540. 29.9 1.37 25.5 4708. 0.796
#> 6 1_3 2 XGBOOST Test 3086. 18.8 1.20 20.4 5086. 0.787
#> 7 1_3 3 ENSEMBLE (MEAN): ~ Test 2662. 19.0 1.03 17.7 4038. 0.819
#> 8 1_3 4 ENSEMBLE (MEDIAN)~ Test 2662. 19.0 1.03 17.7 4038. 0.819
# ENSEMBLE WEIGHTED TESTING ----
parallel_start(6)
ensem <- nested_modeltime_tbl %>%
ensemble_nested_weighted(
loadings = c(2,1),
loading_method = "lowest_rmse",
control = control_nested_fit(allow_par = FALSE, verbose = TRUE)
)
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.278002 secs.
#>
ensem
#> # Nested Modeltime Table
#>
#> Trained on: .splits | Model Errors: [0]
#> # A tibble: 2 x 5
#> id .actual_data .future_data .splits .modeltime_tables
#> <fct> <list> <list> <list> <list>
#> 1 1_1 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [3 x~
#> 2 1_3 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [3 x~
ensem %>%
extract_nested_modeltime_table(1) %>%
slice(3) %>%
pluck(".model", 1)
#> -- Modeltime Ensemble -------------------------------------------
#> Ensemble of 2 Models (WEIGHTED)
#>
#> # Modeltime Table
#> # A tibble: 2 x 6
#> .model_id .model .model_desc .type .calibration_data .loadings
#> <int> <list> <chr> <chr> <list> <dbl>
#> 1 1 <workflow> PROPHET Test <tibble [52 x 4]> 0.333
#> 2 2 <workflow> XGBOOST Test <tibble [52 x 4]> 0.667
ensem %>%
extract_nested_test_accuracy()
#> # A tibble: 6 x 10
#> id .model_id .model_desc .type mae mape mase smape rmse rsq
#> <fct> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1_1 1 PROPHET Test 10071. 45.9 1.99 60.0 11777. 0.383
#> 2 1_1 2 XGBOOST Test 6237. 25.3 1.23 24.6 9017. 0.191
#> 3 1_1 3 ENSEMBLE (WEIGHTE~ Test 4414. 14.8 0.870 16.0 8321. 0.410
#> 4 1_3 1 PROPHET Test 3540. 29.9 1.37 25.5 4708. 0.796
#> 5 1_3 2 XGBOOST Test 3086. 18.8 1.20 20.4 5086. 0.787
#> 6 1_3 3 ENSEMBLE (WEIGHTE~ Test 2772. 21.2 1.08 19.1 4068. 0.820
ensem %>%
extract_nested_test_forecast() %>%
group_by(id) %>%
plot_modeltime_forecast(.interactive = F)
Created on 2021-10-12 by the reprex package (v2.0.1)
Add method for working with Local ID and Nested Forecast Ensembles.