business-science / modeltime.ensemble

Time Series Ensemble Forecasting
https://business-science.github.io/modeltime.ensemble/
Other
73 stars 16 forks source link

Vignette - Recursive Forecast Ensembles #6

Closed mdancho84 closed 3 years ago

mdancho84 commented 3 years ago

Create a vignette that uses the following examples.

Example 1 - Single Time Series

library(modeltime.ensemble)
library(modeltime)
library(tidymodels)
library(tidyverse)
library(lubridate)
library(timetk)

FORECAST_HORIZON <- 24

m750_extended <- m750 %>%
    group_by(id) %>%
    future_frame(
        .length_out = FORECAST_HORIZON,
        .bind_data  = TRUE
    ) %>%
    ungroup()
#> .date_var is missing. Using: date

recipe_lag <- recipe(value ~ date, m750_extended) %>%
    step_lag(value, lag = 1:FORECAST_HORIZON)

# Data Preparation
m750_lagged <- recipe_lag %>% prep() %>% juice()
m750_lagged
#> # A tibble: 330 x 26
#>    date       value lag_1_value lag_2_value lag_3_value lag_4_value lag_5_value
#>    <date>     <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
#>  1 1990-01-01  6370          NA          NA          NA          NA          NA
#>  2 1990-02-01  6430        6370          NA          NA          NA          NA
#>  3 1990-03-01  6520        6430        6370          NA          NA          NA
#>  4 1990-04-01  6580        6520        6430        6370          NA          NA
#>  5 1990-05-01  6620        6580        6520        6430        6370          NA
#>  6 1990-06-01  6690        6620        6580        6520        6430        6370
#>  7 1990-07-01  6000        6690        6620        6580        6520        6430
#>  8 1990-08-01  5450        6000        6690        6620        6580        6520
#>  9 1990-09-01  6480        5450        6000        6690        6620        6580
#> 10 1990-10-01  6820        6480        5450        6000        6690        6620
#> # … with 320 more rows, and 19 more variables: lag_6_value <dbl>,
#> #   lag_7_value <dbl>, lag_8_value <dbl>, lag_9_value <dbl>,
#> #   lag_10_value <dbl>, lag_11_value <dbl>, lag_12_value <dbl>,
#> #   lag_13_value <dbl>, lag_14_value <dbl>, lag_15_value <dbl>,
#> #   lag_16_value <dbl>, lag_17_value <dbl>, lag_18_value <dbl>,
#> #   lag_19_value <dbl>, lag_20_value <dbl>, lag_21_value <dbl>,
#> #   lag_22_value <dbl>, lag_23_value <dbl>, lag_24_value <dbl>

train_data <- m750_lagged %>%
    filter(!is.na(value)) %>%
    drop_na()

future_data <- m750_lagged %>%
    filter(is.na(value))

### Fitting models

model_fit_lm <- linear_reg() %>%
    set_engine("lm") %>%
    fit(value ~ ., data = train_data)

model_fit_mars <- mars("regression") %>%
    set_engine("earth", endspan = 24) %>%
    fit(value ~ ., data = train_data)

recursive_ensemble <- modeltime_table(
    model_fit_lm,
    model_fit_mars
) %>%
    ensemble_average(type = "mean") %>%
    recursive(
        transform  = recipe_lag,
        train_tail = tail(train_data, FORECAST_HORIZON)
    )

fcast <- modeltime_table(
    recursive_ensemble
) %>%
    modeltime_forecast(
        new_data = future_data,
        actual_data = m750
    )

fcast %>%
    plot_modeltime_forecast(
        .interactive = FALSE,
        .conf_interval_show = FALSE,
    )

Created on 2021-04-02 by the reprex package (v1.0.0)

Example 2 - Multiple Time Series Panel

library(modeltime.ensemble)
library(modeltime)
library(tidymodels)
library(tidyverse)
library(lubridate)
library(timetk)

m4_monthly
#> # A tibble: 1,574 x 3
#>    id    date       value
#>    <fct> <date>     <dbl>
#>  1 M1    1976-06-01  8000
#>  2 M1    1976-07-01  8350
#>  3 M1    1976-08-01  8570
#>  4 M1    1976-09-01  7700
#>  5 M1    1976-10-01  7080
#>  6 M1    1976-11-01  6520
#>  7 M1    1976-12-01  6070
#>  8 M1    1977-01-01  6650
#>  9 M1    1977-02-01  6830
#> 10 M1    1977-03-01  5710
#> # … with 1,564 more rows

FORECAST_HORIZON <- 24

m4_extended <- m4_monthly %>%
    group_by(id) %>%
    future_frame(
        .length_out = FORECAST_HORIZON,
        .bind_data  = TRUE
    ) %>%
    ungroup()
#> .date_var is missing. Using: date

# TRANSFORM FUNCTION ----
# - NOTE - We create lags by group
lag_transformer_grouped <- function(data){
    data %>%
        group_by(id) %>%
        tk_augment_lags(value, .lags = 1:FORECAST_HORIZON) %>%
        ungroup()
}

m4_lags <- m4_extended %>%
    lag_transformer_grouped()

train_data <- m4_lags %>%
    drop_na()

future_data <- m4_lags %>%
    filter(is.na(value))

model_fit_lm <- linear_reg() %>%
    set_engine("lm") %>%
    fit(value ~ ., data = train_data)

model_fit_mars <- mars("regression") %>%
    set_engine("earth") %>%
    fit(value ~ ., data = train_data)

recursive_ensemble_p <- modeltime_table(
    model_fit_mars,
    model_fit_lm
) %>%
    ensemble_average(type = "median") %>%
    recursive(
        transform  = lag_transformer_grouped,
        train_tail = panel_tail(train_data, id, FORECAST_HORIZON),
        id = "id"
    )

fcast <- modeltime_table(
    recursive_ensemble_p
) %>%
    modeltime_forecast(
        new_data = future_data,
        actual_data = m4_lags,
        keep_data = TRUE
    )

fcast %>%
    group_by(id) %>%
    plot_modeltime_forecast(
        .interactive = FALSE,
        .conf_interval_show = FALSE
    )

Created on 2021-04-02 by the reprex package (v1.0.0)

mdancho84 commented 3 years ago

Closed by this commit: https://github.com/business-science/modeltime.ensemble/commit/4c8604d90758a6d52df9ec8d8615e85bbdab2c54