tidyverts / fabletools

General fable features useful for extension packages
http://fabletools.tidyverts.org/
90 stars 31 forks source link

Order matters when combining more than two individual models #321

Open xqnwang opened 3 years ago

xqnwang commented 3 years ago

When combining more than two individual models, changing the order of individual models in the combination_model() function will produce different combination results (variance). I give an example below for forecast combinations using the cafe.rds data in the NYCR talk by Rob.

library(fpp3)
library(distributional)
library(gganimate)
library(stringr)
library(ggdist)
library(purrr)

# data Import
cafe <- readRDS("quantile_ensemble_talk/cafe.rds")
auscafe <- cafe %>%
  summarise(turnover = sum(turnover))
train <- auscafe %>%
  filter(year(date) <= 2018)

# equally weighted combination
fc_s <- train %>%
  model(
    COMB_NAS = combination_model(NAIVE(turnover),
                                 ARIMA(turnover ~ pdq(d=1) + PDQ(D=1)),
                                 SNAIVE(turnover),
                                 cmbn_args = list(weights = "equal")),
    COMB_NSA = combination_model(NAIVE(turnover),
                                 SNAIVE(turnover),
                                 ARIMA(turnover ~ pdq(d=1) + PDQ(D=1)),
                                 cmbn_args = list(weights = "equal"))
  ) %>% forecast(h = 1)

# weighted combination using inv_var
fc_w <- train %>%
  model(
    COMB_NAS = combination_model(NAIVE(turnover),
                                 ARIMA(turnover ~ pdq(d=1) + PDQ(D=1)),
                                 SNAIVE(turnover),
                                 cmbn_args = list(weights = "inv_var")),
    COMB_NSA = combination_model(NAIVE(turnover),
                                 SNAIVE(turnover),
                                 ARIMA(turnover ~ pdq(d=1) + PDQ(D=1)),
                                 cmbn_args = list(weights = "inv_var"))
  ) %>% forecast(h = 1)

# outputs
list(fc_s = fc_s$turnover, fc_w = fc_w$turnover)
#> $fc_s
#> <distribution[2]>
#>   [1] N(3945, 10775) N(3945, 10765)
#> 
#> $fc_w
#> <distribution[2]>
#>   [1] N(3850, 5062) N(3850, 5230)

It can be observed that, whether working with an equally or unequally weighted combination, the variances of the combined normal distributions with different order of individual models are different. The reason is that the combination_model() function combines the individual models in an iterative manner.

I think this should be modified using the mean and variance of the weighted sum/average of correlated variables (please refer to the link). And I wrote the following code to achieve the combinations of multiple models, resulting in the same combined distributions even when the order of individual models was changed.

# model training and forecasting
ft <- train %>%
  model(
    NAIVE = NAIVE(turnover),
    ARIMA = ARIMA(turnover ~ pdq(d=1) + PDQ(D=1)),
    SNAIVE = SNAIVE(turnover),
    COMB = combination_model(NAIVE(turnover),
                             ARIMA(turnover ~ pdq(d=1) + PDQ(D=1)),
                             SNAIVE(turnover),
                             cmbn_args = list(weights = "equal")),
  ) 
fc <- ft %>% forecast(h = "1 year")

# combinations in fable
combination_fable <- fc %>%
  filter(.model=="COMB") %>%
  as_tibble() %>%
  select(date,.model,turnover)

# combinations using the weighted average of correlated variables
weights_type <- "equal" # "equal" or "inv_var"
if(weights_type == "equal"){
  weights <- rep(1/3, 3)
} else{
  # weights are obtained using (innovation) residuals
  inv_var <- c(1/var(residuals(ft$NAIVE[[1]])$.resid, na.rm = TRUE),
               1/var(residuals(ft$ARIMA[[1]])$.resid, na.rm = TRUE),
               1/var(residuals(ft$SNAIVE[[1]])$.resid, na.rm = TRUE))
  weights <- inv_var/sum(inv_var)
}

## correlation among individual (response) residuals
resid_naive <- residuals(ft$NAIVE[[1]], type = "response")[[".resid"]]
resid_arima <- residuals(ft$ARIMA[[1]], type = "response")[[".resid"]]
resid_snaive <- residuals(ft$SNAIVE[[1]], type = "response")[[".resid"]]
ft_cov <- var(cbind(resid_naive,resid_arima,resid_snaive), na.rm = TRUE)
ft_cor <- cov2cor(ft_cov)

## combined distributions
fc_dist_naive <- fc$turnover[1:12]
fc_dist_arima <- fc$turnover[13:24]
fc_dist_snaive <- fc$turnover[25:36]
fc_dist_comb <- fc$turnover[37:48] # combination_fable
mu_naive <- mean(fc_dist_naive)
mu_arima <- mean(fc_dist_arima)
mu_snaive <- mean(fc_dist_snaive)
mu_comb <- mean(fc_dist_comb)
var_naive <- variance(fc_dist_naive)
var_arima <- variance(fc_dist_arima)
var_snaive <- variance(fc_dist_snaive)
var_comb <- variance(fc_dist_comb)

## modified combined distributions
mu_test <- apply(cbind(mu_naive, mu_arima, mu_snaive),
                 1,
                 function(x) weighted.mean(x, w = weights))
var_test <- weights[1]^2*var_naive + weights[2]^2*var_arima + weights[3]^2*var_snaive + 
  2 * weights[1]*weights[2] * sqrt(var_naive)*sqrt(var_arima)*ft_cor[1,2] +
  2 * weights[1]*weights[3] * sqrt(var_naive)*sqrt(var_snaive)*ft_cor[1,3] +
  2 * weights[2]*weights[3] * sqrt(var_arima)*sqrt(var_snaive)*ft_cor[2,3]

## replace with modified mean and variance
fc_test <- fc
fc_test$turnover[37:48] <- distributional::dist_normal(mu_test, sqrt(var_test))
fc_test$.model[37:48] <- rep("TEST", 12)
combination_test <- fc_test %>%
  filter(.model=="TEST") %>%
  as_tibble() %>%
  select(date,.model,turnover)

# results
fc2 <- bind_rows(combination_fable, combination_test) %>%
  mutate(.model = factor(.model, levels=c("COMB","TEST"))) %>%
  as_fable(index=date, key=.model, distribution=turnover, response="turnover")
## combinations in fable
fc2$turnover[1:12]
#> <distribution[12]>
#>  [1] N(3945, 10775) N(3733, 16170) N(3970, 21375) N(3928, 26486) N(3928, 31537)
#>  [6] N(3881, 36547) N(4032, 41526) N(4068, 46480) N(4051, 51414) N(4100, 56331)
#> [11] N(4096, 61235) N(4324, 66125)

## modified combinations
fc2$turnover[13:24]
#> <distribution[12]>
#>  [1] N(3945, 10814) N(3733, 16050) N(3970, 21147) N(3928, 26174) N(3928, 31155)
#>  [6] N(3881, 36105) N(4032, 41030) N(4068, 45936) N(4051, 50826) N(4100, 55703)
#> [11] N(4096, 60568) N(4324, 65424)

# plot
fc2 %>% filter(.model=="COMB") %>% 
  autoplot(train %>% filter(year(date) >= 2017), color='red') + 
  labs(title = "Combination in fable")
fc2 %>% filter(.model=="TEST") %>% 
  autoplot(train %>% filter(year(date) >= 2017), color='blue') + 
  labs(title = "Test combination")
fc2 %>% autoplot(train %>% filter(year(date) >= 2017)) +
  autolayer(filter(fc2, .model=="COMB"), color='red', alpha = 0.3) + 
  autolayer(filter(fc2, .model=="TEST"), color='blue', alpha = 0.3)
mitchelloharawild commented 3 years ago

Thanks for the clear description and reproducible example. I will make these changes for the next release.