tidyverts / fabletools

General fable features useful for extension packages
http://fabletools.tidyverts.org/
89 stars 31 forks source link

Reconciling error #267

Closed robjhyndman closed 4 years ago

robjhyndman commented 4 years ago

Trying to replicate https://stackoverflow.com/q/64012331/144157 created a different problem from that reported:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tsibble)
library(fable)
#> Loading required package: fabletools

set.seed(1)

B1 <- rnorm(12, mean = 5) + (1:12) 
B2 <- rnorm(12, mean = 5) 
M2 <- rnorm(12, mean = 25) 

ts_data <- tibble(value = c(B1, B2, M2),  
                  month = rep(yearmonth(paste("2020", 1:12, sep="-")), 3),  
                  B = c(rep("B1", 12), rep("B2", 12), rep("B3", 12)),  
                  M = c(rep("M1", 24), rep("M2", 12))) %>% 
  as_tsibble(key = c("B", "M"), index = month) 

fcsts <- ts_data %>% 
  # Specify hierarchy 
  aggregate_key(M / B, value = sum(value)) %>% 
  # Fit models 
  model(arima = ARIMA(value)) %>% 
  # Set up reconciliation 
  mutate(mint = min_trace(arima)) %>% 
  # Produce the forecasts 
  forecast(h = 1) 

ts_data_2 <- ts_data %>%  
  filter(B == "B3") 
fcsts_2 <- ts_data_2 %>% 
  # Specify hierarchy 
  aggregate_key(M / B, value = sum(value)) %>% 
  # Fit models 
  model(arima = ARIMA(value)) %>% 
  # Set up reconciliation 
  mutate(mint = min_trace(arima)) %>% 
  # Produce the forecasts 
  forecast(h = 6)
#> Error: Problem with `mutate()` input `mint`.
#> x trying to get slot "i" from an object of a basic class ("numeric") with no slots
#> ℹ Input `mint` is `(function (object, ...) ...`.

Created on 2020-09-23 by the reprex package (v0.3.0)

Session info ``` r devtools::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.0.2 (2020-06-22) #> os Ubuntu 20.04.1 LTS #> system x86_64, linux-gnu #> ui X11 #> language en_AU:en #> collate en_AU.UTF-8 #> ctype en_AU.UTF-8 #> tz Australia/Melbourne #> date 2020-09-23 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib #> anytime 0.3.9 2020-08-27 [1] #> assertthat 0.2.1 2019-03-21 [1] #> backports 1.1.10 2020-09-15 [1] #> callr 3.4.4 2020-09-07 [1] #> cli 2.0.2 2020-02-28 [1] #> colorspace 1.4-2 2020-05-04 [1] #> crayon 1.3.4.9000 2020-08-18 [1] #> desc 1.2.0 2018-05-01 [1] #> devtools 2.3.2 2020-09-18 [1] #> digest 0.6.25 2020-02-23 [1] #> distributional 0.2.0.9000 2020-09-22 [1] #> dplyr * 1.0.2 2020-08-18 [1] #> ellipsis 0.3.1 2020-05-15 [1] #> evaluate 0.14 2019-05-28 [1] #> fable * 0.2.1 2020-06-16 [1] #> fabletools * 0.2.1 2020-09-03 [1] #> fansi 0.4.1 2020-01-08 [1] #> farver 2.0.3 2020-01-16 [1] #> feasts 0.1.5 2020-08-27 [1] #> fs 1.5.0 2020-07-31 [1] #> generics 0.0.2 2018-11-29 [1] #> ggplot2 3.3.2 2020-06-19 [1] #> glue 1.4.2 2020-08-27 [1] #> gtable 0.3.0 2019-03-25 [1] #> highr 0.8 2019-03-20 [1] #> htmltools 0.5.0.9000 2020-08-24 [1] #> knitr 1.30.1 2020-09-22 [1] #> lattice 0.20-41 2020-04-02 [1] #> lifecycle 0.2.0 2020-03-06 [1] #> lubridate 1.7.9 2020-06-08 [1] #> magrittr 1.5 2014-11-22 [1] #> Matrix 1.2-18 2019-11-27 [1] #> memoise 1.1.0 2017-04-21 [1] #> munsell 0.5.0 2018-06-12 [1] #> nlme 3.1-149 2020-08-23 [1] #> pillar 1.4.6 2020-07-10 [1] #> pkgbuild 1.1.0 2020-07-13 [1] #> pkgconfig 2.0.3 2019-09-22 [1] #> pkgload 1.1.0 2020-05-29 [1] #> prettyunits 1.1.1 2020-01-24 [1] #> processx 3.4.4 2020-09-03 [1] #> progressr 0.6.0 2020-05-19 [1] #> ps 1.3.4 2020-08-11 [1] #> purrr 0.3.4 2020-04-17 [1] #> R6 2.4.1 2019-11-12 [1] #> Rcpp 1.0.5 2020-07-06 [1] #> remotes 2.2.0 2020-07-21 [1] #> rlang 0.4.7 2020-07-09 [1] #> rmarkdown 2.3.9 2020-09-22 [1] #> rprojroot 1.3-2 2018-01-03 [1] #> scales 1.1.1 2020-05-11 [1] #> sessioninfo 1.1.1 2018-11-05 [1] #> stringi 1.5.3 2020-09-09 [1] #> stringr 1.4.0 2019-02-10 [1] #> testthat 2.99.0.9000 2020-09-22 [1] #> tibble 3.0.3 2020-07-10 [1] #> tidyr 1.1.2 2020-08-27 [1] #> tidyselect 1.1.0 2020-05-11 [1] #> tsibble * 0.9.2 2020-07-24 [1] #> urca 1.3-0 2016-09-06 [1] #> usethis 1.6.3 2020-09-17 [1] #> vctrs 0.3.4 2020-08-29 [1] #> withr 2.3.0 2020-09-22 [1] #> xfun 0.17 2020-09-09 [1] #> yaml 2.2.1 2020-02-01 [1] #> source #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> R-Forge (R 4.0.2) #> Github (r-lib/crayon@6b3f0c6) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> Github (mitchelloharawild/distributional@77fb25e) #> CRAN (R 4.0.2) #> RSPM (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> RSPM (R 4.0.1) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> CRAN (R 4.0.0) #> Github (rstudio/htmltools@e35c3fa) #> Github (yihui/knitr@0a73970) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> RSPM (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> RSPM (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> RSPM (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> RSPM (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> CRAN (R 4.0.0) #> RSPM (R 4.0.2) #> CRAN (R 4.0.2) #> RSPM (R 4.0.2) #> Github (rstudio/rmarkdown@80ae1c2) #> CRAN (R 4.0.0) #> RSPM (R 4.0.0) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> Github (r-lib/testthat@3b0b970) #> RSPM (R 4.0.2) #> CRAN (R 4.0.2) #> RSPM (R 4.0.0) #> RSPM (R 4.0.2) #> CRAN (R 4.0.0) #> CRAN (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.2) #> CRAN (R 4.0.0) #> #> [1] /home/robjhyndman/R/x86_64-pc-linux-gnu-library/4.0 #> [2] /usr/local/lib/R/site-library #> [3] /usr/lib/R/site-library #> [4] /usr/lib/R/library ```
mitchelloharawild commented 4 years ago

Fixed, thanks. Issue was sparse matrix structure being dropped to numeric when the hierarchy contains only a single leaf node.

mitchelloharawild commented 4 years ago

Regarding https://stackoverflow.com/q/64012331/144157 from a code standpoint, the latest version of fabletools supports unbalanced hierarchies, which allows them to fix their variance issue by removing the redundant aggregations. For now this is done with filter(), but I will expect an option will be added to aggregate_key() for this. The related issue here is #226.

Your probably better suited to answering the change in variance, so I'll leave answering that to you.

Code for unbalanced hierarchies to follow.

mitchelloharawild commented 4 years ago

Here's an unbalanced hierarchy for this example. You can take any nodes out of the hierarchy so long as it doesn't produce a disjoint hierarchy (until #106 is done), and the appropriate S matrix will be computed.

library(fpp3)
#> ── Attaching packages ────────────────────────────────────────────────────────────────────────────── fpp3 0.3 ──
#> ✓ tibble      3.0.3          ✓ tsibble     0.9.2.9000
#> ✓ dplyr       1.0.2          ✓ tsibbledata 0.2.0     
#> ✓ tidyr       1.1.2          ✓ feasts      0.1.5     
#> ✓ lubridate   1.7.9          ✓ fable       0.2.1.9000
#> ✓ ggplot2     3.3.2
#> ── Conflicts ───────────────────────────────────────────────────────────────────────────────── fpp3_conflicts ──
#> x lubridate::date()   masks base::date()
#> x dplyr::filter()     masks stats::filter()
#> x tsibble::interval() masks lubridate::interval()
#> x dplyr::lag()        masks stats::lag()
B1 <- rnorm(12, mean = 5) + (1:12)
B2 <- rnorm(12, mean = 5)
M2 <- rnorm(12, mean = 25)

ts_data <- tibble(value = c(B1, B2, M2), 
                  month = rep(yearmonth(paste("2020", 1:12, sep="-")), 3), 
                  B = c(rep("B1", 12), rep("B2", 12), rep("B3", 12)), 
                  M = c(rep("M1", 24), rep("M2", 12))) %>%
  as_tsibble(key = c("B", "M"), index = month)

ts_data %>%
  # Specify hierarchy
  aggregate_key(M / B, value = sum(value)) %>% 
  # Remove redundant nodes from hierarchy (#226)
  filter(!(B == "B3")) %>% 
  # Fit models
  model(arima = ARIMA(value)) %>%
  # Set up reconciliation
  mutate(mint = min_trace(arima)) %>%
  # Produce the forecasts
  forecast(h = 1)
#> # A fable: 10 x 6 [1M]
#> # Key:     M, B, .model [10]
#>    M            B            .model    month        value .mean
#>    <chr>        <chr>        <chr>     <mth>       <dist> <dbl>
#>  1 M1           B1           arima  2021 Jan   N(19, 1.9) 18.5 
#>  2 M1           B1           mint   2021 Jan   N(18, 1.2) 17.6 
#>  3 M1           B2           arima  2021 Jan    N(4.9, 1)  4.86
#>  4 M1           B2           mint   2021 Jan N(4.3, 0.81)  4.26
#>  5 M1           <aggregated> arima  2021 Jan   N(20, 3.6) 20.4 
#>  6 M1           <aggregated> mint   2021 Jan   N(22, 1.2) 21.9 
#>  7 M2           <aggregated> arima  2021 Jan  N(25, 0.62) 24.8 
#>  8 M2           <aggregated> mint   2021 Jan  N(25, 0.56) 24.7 
#>  9 <aggregated> <aggregated> arima  2021 Jan     N(46, 4) 45.8 
#> 10 <aggregated> <aggregated> mint   2021 Jan   N(47, 1.4) 46.6

Created on 2020-09-23 by the reprex package (v0.3.0)