business-science / modeltime

Modeltime unlocks time series forecast models and machine learning in one framework
https://business-science.github.io/modeltime/
Other
515 stars 78 forks source link

Recursive forecasting for non panel data ERROR object '.key' not found #141

Closed Shafi2016 closed 2 years ago

Shafi2016 commented 2 years ago

Hello @mdancho84, @AlbertoAlmuinha, I am using recursive forecasting on data where I have one target variable and a few independent variables. I have given reproducible codes in the end.

With this chunk of codes

model_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = data_prepared_tbl 
  ) 

  plot_modeltime_forecast(
    .interactive        = FALSE,
    .conf_interval_show = FALSE,
    .facet_ncol         = 2
  )

I get the following error image


library(Quandl)

# Tidymodeling
library(modeltime.ensemble)
library(modeltime)
library(tidymodels)

# Base Models

library(glmnet)
library(xgboost)

# Core Packages
library(tidyverse)
library(lubridate)
library(timetk)
df1 <- Quandl(code = "FRED/PINCOME",
              type = "raw",
              collapse = "monthly",
              order = "asc",
              end_date="2017-12-31")
df2 <- Quandl(code = "FRED/GDP",
              type = "raw",
              collapse = "monthly",
              order = "asc",
              end_date="2017-12-31")

per <- df1 %>% rename(PI = Value)%>% select(-Date)
gdp <- df2 %>% rename(GDP = Value) 

data <- cbind(gdp,per)

data1 <- tk_augment_differences(
  .data = data,
  .value = GDP:PI,
  .lags = 1,
  .differences = 1,
  .log = TRUE,
  .names = "auto") %>%
  select(-GDP,-PI) %>%

  rename(GDP = GDP_lag1_diff1,PI = PI_lag1_diff1) %>% 
  drop_na()

horizon    <- 15
lag_period <- 15

data_pre_full <- data1 %>%
  # Add future window----
bind_rows(
  future_frame(.data = .,.date_var = Date, .length_out = horizon)
) %>%      

  # add lags----
tk_augment_lags(
  .value =  GDP : PI   , 
  .lags = lag_period) 

# 2.0 STEP 2 - SEPARATE INTO MODELING & FORECAST DATA ----

data_prepared_tbl <-   data_pre_full %>%

  filter(!is.na(GDP)) %>% 
  dplyr::select(-GDP : -PI)  %>%  
  drop_na()

forecast_tbl <-   data_pre_full %>%
  filter(is.na(GDP))  %>%  
  dplyr::select(-GDP : -PI)  

splits <- time_series_split(data_prepared_tbl, assess = 8, cumulative = TRUE)

lag_transformer_grouped <- function(data){
  data %>%
  tk_augment_lags(value, .lags = 1: 8) 

}

model_fit_glmnet <- linear_reg(penalty = 1) %>%
  set_engine("glmnet") %>%
  fit( GDP_lag15 ~ ., data = training(splits))

model_fit_xgboost <- boost_tree("regression", learn_rate = 0.35) %>%
  set_engine("xgboost") %>%
  fit( GDP_lag15 ~ ., data = training(splits))

recursive_ensemble_panel <- modeltime_table(
  model_fit_glmnet,
  model_fit_xgboost
) %>%
  ensemble_weighted(loadings = c(4, 6)) %>%
  recursive(
    transform  = lag_transformer_grouped,
    train_tail = tail(training(splits),  15),

  )

recursive_ensemble_panel

model_tbl <- modeltime_table(
  recursive_ensemble_panel
)

model_tbl

# * Accuracy Test ----

#calibration_tbl %>% modeltime_accuracy()

model_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = data_prepared_tbl

  ) 

  plot_modeltime_forecast(
    .interactive        = FALSE,
    .conf_interval_show = FALSE,
    .facet_ncol         = 2
  )
AlbertoAlmuinha commented 2 years ago

Hi @Shafi2016 ,

Here is your code corrected. Your problem was in the lag_transformer_grouped() function. Hope it helps.

library(Quandl)

# Tidymodeling
library(modeltime.ensemble)
library(modeltime)
library(tidymodels)

# Base Models

library(glmnet)
library(xgboost)

# Core Packages
library(tidyverse)
library(lubridate)
library(timetk)
df1 <- Quandl(code = "FRED/PINCOME",
              type = "raw",
              collapse = "monthly",
              order = "asc",
              end_date="2017-12-31")
df2 <- Quandl(code = "FRED/GDP",
              type = "raw",
              collapse = "monthly",
              order = "asc",
              end_date="2017-12-31")

per <- df1 %>% rename(PI = Value)%>% select(-Date)
gdp <- df2 %>% rename(GDP = Value) 

data <- cbind(gdp,per)

data1 <- tk_augment_differences(
  .data = data,
  .value = GDP:PI,
  .lags = 1,
  .differences = 1,
  .log = TRUE,
  .names = "auto") %>%
  select(-GDP,-PI) %>%

  rename(GDP = GDP_lag1_diff1,PI = PI_lag1_diff1) %>% 
  drop_na()

horizon    <- 15
lag_period <- 15

data_pre_full <- data1 %>%
  # Add future window----
bind_rows(
  future_frame(.data = .,.date_var = Date, .length_out = horizon)
) %>%      

  # add lags----
tk_augment_lags(
  .value =  GDP : PI   , 
  .lags = lag_period) 

# 2.0 STEP 2 - SEPARATE INTO MODELING & FORECAST DATA ----

data_prepared_tbl <-   data_pre_full %>%

  filter(!is.na(GDP)) %>% 
  dplyr::select(-GDP : -PI)  %>%  
  drop_na()

forecast_tbl <-   data_pre_full %>%
  filter(is.na(GDP))  %>%  
  dplyr::select(-GDP : -PI)  

splits <- time_series_split(data_prepared_tbl, assess = 8, cumulative = TRUE)

lag_transformer_grouped <- function(data){
  data %>%
    tk_augment_lags(GDP_lag15, .lags = 1: 8) 

}

model_fit_glmnet <- linear_reg(penalty = 1) %>%
  set_engine("glmnet") %>%
  fit( GDP_lag15 ~ ., data = training(splits))

model_fit_xgboost <- boost_tree("regression", learn_rate = 0.35) %>%
  set_engine("xgboost") %>%
  fit( GDP_lag15 ~ ., data = training(splits))

recursive_ensemble_panel <- modeltime_table(
  model_fit_glmnet,
  model_fit_xgboost
) %>%
  ensemble_weighted(loadings = c(4, 6)) %>%
  recursive(
    transform  = lag_transformer_grouped,
    train_tail = tail(training(splits),  15),

  )

recursive_ensemble_panel

model_tbl <- modeltime_table(
  recursive_ensemble_panel
)

model_tbl

# * Accuracy Test ----

#calibration_tbl %>% modeltime_accuracy()

model_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = data_prepared_tbl

  ) %>%
plot_modeltime_forecast(
  .interactive        = FALSE,
  .conf_interval_show = FALSE,
  .facet_ncol         = 2
)

If it helps, please close the issue :)

Regards,

Shafi2016 commented 2 years ago

Great, Thank You @AlbertoAlmuinha