business-science / modeltime.ensemble

Time Series Ensemble Forecasting
https://business-science.github.io/modeltime.ensemble/
Other
72 stars 19 forks source link

Error with modeltime_fit_resamples #9

Open spsanderson opened 3 years ago

spsanderson commented 3 years ago

data_tbl.xlsx

I am getting the following error when using modeltime_fit_resamples

* Model ID: 3 SEASONAL DECOMP: ETS(A,AD,N)
i Slice1: preprocessor 1/1
v Slice1: preprocessor 1/1
i Slice1: preprocessor 1/1, model 1/1
frequency = 3 observations per 1 quarter
External regressors (xregs) detected. STLM + ETS is a univariate method. Ignoring xregs.
v Slice1: preprocessor 1/1, model 1/1
i Slice1: preprocessor 1/1, model 1/1 (predictions)
Error: Problem with `mutate()` column `.resample_results`.
i `.resample_results = purrr::pmap(...)`.
x <text>:1:2: unexpected ','
1: 0,
     ^
> rlang::last_error()
<error/dplyr:::mutate_error>
Problem with `mutate()` column `.resample_results`.
i `.resample_results = purrr::pmap(...)`.
x <text>:1:2: unexpected ','
1: 0,
     ^
Backtrace:
Run `rlang::last_trace()` to see the full context.
> rlang::last_trace()
<error/dplyr:::mutate_error>
Problem with `mutate()` column `.resample_results`.
i `.resample_results = purrr::pmap(...)`.
x <text>:1:2: unexpected ','
1: 0,
     ^
Backtrace:
     x
  1. +-`%>%`(...)
  2. +-modeltime.resample::modeltime_fit_resamples(...)
  3. +-modeltime.resample:::modeltime_fit_resamples.mdl_time_tbl(...)
  4. | \-modeltime.resample:::map_fit_resamples(data, resamples, control)
  5. |   \-`%>%`(...)
  6. +-dplyr::mutate(...)
  7. +-dplyr:::mutate.data.frame(...)
  8. | \-dplyr:::mutate_cols(.data, ..., caller_env = caller_env())
  9. |   +-base::withCallingHandlers(...)
 10. |   \-mask$eval_all_mutate(quo)
 11. +-purrr::pmap(...)
 12. | \-modeltime.resample:::.f(...)
 13. |   \-cli::cli_li(stringr::str_glue("Model ID: {cli::col_blue(as.character(id))} {cli::col_blue(desc)}"))
 14. |     +-cli:::cli__message(...)
 15. |     | \-"id" %in% names(args)
 16. |     \-base::lapply(items, glue_cmd, .envir = .envir)
 17. |       \-cli:::FUN(X[[i]], ...)
 18. |         \-glue::glue(...)
 19. |           \-glue::glue_data(...)
 20. +-(function (expr) ...
 21. | \-cli:::.transformer(expr, env)
 22. |   \-base::stop(res)
 23. \-(function (e) ...
<error/simpleError>
<text>:1:2: unexpected ','
1: 0,
     ^

Here is the full script:


# Lib Load ----------------------------------------------------------------

if(!require(pacman)) install.packages("pacman")
pacman::p_load(
  "tidymodels",
  "modeltime",
  "tidyverse",
  "lubridate",
  "timetk",
  "odbc",
  "DBI",
  "janitor",
  "timetk",
  "tidyquant",
  "modeltime.ensemble",
  "modeltime.resample",
  "modeltime.h2o"
)

interactive <- TRUE

data_tbl <- xlsx::read.xlsx("data_tbl.xlsx",sheetIndex = 1)

# TS Plot -----------------------------------------------------------------

start_date <- min(data_tbl$date_col)
end_date   <- max(data_tbl$date_col)

plot_time_series(
  .data = data_tbl
  , .date_var = date_col
  , .value = excess_days
  , .title = paste0(
    "Excess Days for IP Discharges from: "
    , start_date
    , " to "
    , end_date
  )
  , .interactive = FALSE
)

plot_seasonal_diagnostics(
  .data = data_tbl
  , .date_var = date_col
  , .value = excess_days
)

plot_anomaly_diagnostics(
  .data = data_tbl
  , .date_var = date_col
  , .value = excess_days
)

# Data Split --------------------------------------------------------------
data_final_tbl <- data_tbl %>%
  select(date_col, excess_days)

splits <- initial_time_split(
  data_final_tbl
  , prop = 0.8
  , cumulative = TRUE
)

# Features ----------------------------------------------------------------

recipe_base <- recipe(excess_days ~ ., data = training(splits)) %>%
  step_timeseries_signature(date_col)

recipe_final <- recipe_base %>%
  step_rm(matches("(iso$)|(xts$)|(hour)|(min)|(sec)|(am.pm)")) %>%
  step_normalize(contains("index.num"), date_col_year) %>%
  step_dummy(contains("lbl"), one_hot = TRUE) %>%
  step_fourier(date_col, period = 365/12, K = 2) %>%
  step_holiday_signature(date_col) %>%
  step_YeoJohnson(excess_days)

# Models ------------------------------------------------------------------

# Auto ARIMA --------------------------------------------------------------

model_spec_arima_no_boost <- arima_reg() %>%
  set_engine(engine = "auto_arima")

wflw_fit_arima_no_boost <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_arima_no_boost) %>%
  fit(training(splits))

# Boosted Auto ARIMA ------------------------------------------------------

model_spec_arima_boosted <- arima_boost(
    min_n = 2
    , learn_rate = 0.015
  ) %>%
  set_engine(engine = "auto_arima_xgboost")

wflw_fit_arima_boosted <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_arima_boosted) %>%
  fit(training(splits))

# ETS ---------------------------------------------------------------------

model_spec_ets <- exp_smoothing() %>%
  set_engine(engine = "ets") 

wflw_fit_ets <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_ets) %>%
  fit(training(splits))

# model_spec_croston <- exp_smoothing() %>%
#   set_engine(engine = "croston")
# 
# wflw_fit_croston <- workflow() %>%
#   add_recipe(recipe = recipe_final) %>%
#   add_model(model_spec_croston) %>%
#   fit(training(splits))

# model_spec_theta <- exp_smoothing() %>%
#   set_engine(engine = "theta")
# 
# wflw_fit_theta <- workflow() %>%
#   add_recipe(recipe = recipe_final) %>%
#   add_model(model_spec_theta) %>%
#   fit(training(splits))

# STLM ETS ----------------------------------------------------------------

model_spec_stlm_ets <- seasonal_reg() %>%
  set_engine("stlm_ets")

wflw_fit_stlm_ets <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_stlm_ets) %>%
  fit(training(splits))

model_spec_stlm_tbats <- seasonal_reg() %>%
  set_engine("tbats")

wflw_fit_stlm_tbats <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_stlm_tbats) %>%
  fit(training(splits))

model_spec_stlm_arima <- seasonal_reg() %>%
  set_engine("stlm_arima")

wflw_fit_stlm_arima <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_stlm_arima) %>%
  fit(training(splits))

# NNETAR ------------------------------------------------------------------

model_spec_nnetar <- nnetar_reg() %>%
  set_engine("nnetar")

wflw_fit_nnetar <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_nnetar) %>%
  fit(training(splits))

# Prophet -----------------------------------------------------------------

model_spec_prophet <- prophet_reg() %>%
  set_engine(engine = "prophet")

wflw_fit_prophet <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_prophet) %>%
  fit(training(splits))

model_spec_prophet_boost <- prophet_boost(learn_rate = 0.1) %>% 
  set_engine("prophet_xgboost") 

wflw_fit_prophet_boost <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_prophet_boost) %>%
  fit(training(splits))

# TSLM --------------------------------------------------------------------

model_spec_lm <- linear_reg() %>%
  set_engine("lm")

wflw_fit_lm <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_lm) %>%
  fit(training(splits))

# MARS --------------------------------------------------------------------

model_spec_mars <- mars(mode = "regression") %>%
  set_engine("earth")

wflw_fit_mars <- workflow() %>%
  add_recipe(recipe = recipe_final) %>%
  add_model(model_spec_mars) %>%
  fit(training(splits))

# H2O AutoML --------------------------------------------------------------
# h2o.init(
#   nthreads = -1
#   , ip = 'localhost'
#   , port = 54321
# )
# 
# model_spec <- automl_reg(mode = 'regression') %>%
#   set_engine(
#     engine                     = 'h2o',
#     max_runtime_secs           = 5, 
#     max_runtime_secs_per_model = 3,
#     max_models                 = 3,
#     nfolds                     = 5,
#     exclude_algos              = c("DeepLearning"),
#     verbosity                  = NULL,
#     seed                       = 786
#   ) 
# 
# model_spec
# 
# model_fitted <- model_spec %>%
#   fit(excess_days ~ ., data = training(splits))
# 
# model_fitted
# 
# predict(model_fitted, testing(splits))

# Model Table -------------------------------------------------------------

models_tbl <- modeltime_table(
  #wflw_fit_arima_no_boost,
  wflw_fit_arima_boosted,
  wflw_fit_ets,
  wflw_fit_stlm_ets,
  wflw_fit_stlm_tbats,
  wflw_fit_nnetar,
  wflw_fit_prophet,
  wflw_fit_prophet_boost,
  wflw_fit_lm, 
  wflw_fit_mars
)

# Model Ensemble Table ----------------------------------------------------
resample_tscv <- training(splits) %>%
  time_series_cv(
    date_var      = date_col
    , assess      = "12 months"
    , initial     = "24 months"
    , skip        = "3 months"
    , slice_limit = 1
  )

submodel_predictions <- models_tbl %>% # Model Failure Here 
  modeltime_fit_resamples(
    resamples = resample_tscv
    , control = control_resamples(verbose = TRUE)
  )

ensemble_fit <- submodel_predictions %>%
  ensemble_model_spec(
    model_spec = linear_reg(
      penalty  = tune()
      , mixture = tune()
    ) %>%
      set_engine("glmnet")
    , kfold    = 5
    , grid     = 6
    , control  = control_grid(verbose = TRUE)
  )

fit_mean_ensemble <- models_tbl %>%
  ensemble_average(type = "mean")

fit_median_ensemble <- models_tbl %>%
  ensemble_average(type = "median")
AlbertoAlmuinha commented 1 year ago

Hi @spsanderson ,

This is related with https://github.com/business-science/modeltime.resample/issues/5

I think modeltime.resample is a better place to continue the discussion if you agree.

Regards,