tidyverts / fabletools

General fable features useful for extension packages
http://fabletools.tidyverts.org/
89 stars 31 forks source link

parameterized model formulas stop working when future `plan` is declared #405

Open jrauser opened 2 weeks ago

jrauser commented 2 weeks ago

See reprex below. In my real codebase I'm using furrr's future_map for other purposes (reading a series of files off disk).

library(fable)
#> Loading required package: fabletools
library(tsibble)
#> 
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, union
library(feasts)
library(glue)
library(future)

# Define some random data
set.seed(7)
data <- tsibble(idx=1:1000, value=rnorm(1000), index=idx) 

# Make a STL decomposition
model(data, stl = STL(value ~ trend() + season(10))) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# We can use a variable in the formula
seasonal_period <- 10
model(data, stl = STL(value ~ trend() + season(seasonal_period))) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# Or we can make a formula ourselves
stl_formula <- as.formula(glue("value ~ trend() + season({seasonal_period})"))
stl_formula
#> value ~ trend() + season(10)

# ... and hand it to STL
model(data, stl = STL(stl_formula)) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# The same thing works with ARIMA
fixed_p <- 1
model(data, arima = ARIMA(value ~ pdq(p=fixed_p))) %>% tidy()
#> # A tibble: 1 × 6
#>   .model term  estimate std.error statistic p.value
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>   <dbl>
#> 1 arima  ar1     0.0194    0.0317     0.612   0.541

arima_formula <- as.formula(glue("value ~ pdq(p={fixed_p})"))
model(data, arima = ARIMA(arima_formula)) %>% tidy()
#> # A tibble: 1 × 6
#>   .model term  estimate std.error statistic p.value
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>   <dbl>
#> 1 arima  ar1     0.0194    0.0317     0.612   0.541

# But the moment we declare a plan ...
plan(multisession, workers=8)

# ... "normal" formula with no parameters still works, ...
model(data, stl = STL(value ~ trend() + season(10))) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# ... but the above strategies for parameterizing the formula stop working
model(data, stl = STL(stl_formula)) %>% components()
#> Error: object 'stl_formula' not found
model(data, stl = STL(value ~ trend() + season(seasonal_period))) %>% components()
#> Warning: 1 error encountered for stl
#> [1] object 'seasonal_period' not found
#> Error in `transmute()`:
#> ℹ In argument: `cmp = map(.fit, components)`.
#> Caused by error in `UseMethod()`:
#> ! no applicable method for 'components' applied to an object of class "null_mdl"
#> Backtrace:
#>      ▆
#>   1. ├─... %>% components()
#>   2. ├─generics::components(.)
#>   3. ├─fabletools:::components.mdl_df(.)
#>   4. │ ├─dplyr::transmute(...)
#>   5. │ └─dplyr:::transmute.data.frame(...)
#>   6. │   └─dplyr:::mutate_cols(.data, dots, by)
#>   7. │     ├─base::withCallingHandlers(...)
#>   8. │     └─dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#>   9. │       └─mask$eval_all_mutate(quo)
#>  10. │         └─dplyr (local) eval()
#>  11. ├─fabletools:::map(.fit, components)
#>  12. │ └─base::lapply(.x, .f, ...)
#>  13. │   ├─generics (local) FUN(X[[i]], ...)
#>  14. │   └─fabletools:::components.mdl_ts(X[[i]], ...)
#>  15. │     └─generics::components(object$fit, ...)
#>  16. └─base::.handleSimpleError(...)
#>  17.   └─dplyr (local) h(simpleError(msg, call))
#>  18.     └─rlang::abort(message, class = error_class, parent = parent, call = error_call)
model(data, arima = ARIMA(arima_formula)) %>% tidy()
#> Error: object 'arima_formula' not found
jrauser commented 2 weeks ago

sessionInfo() output in case it matters:

> sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6.7

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] reprex_2.0.2     fable_0.3.3      future_1.33.0    glue_1.7.0       feasts_0.3.1     fabletools_0.3.3 tsibble_1.1.3   

loaded via a namespace (and not attached):
 [1] utf8_1.2.4           generics_0.1.3       tidyr_1.3.0          anytime_0.3.9        lattice_0.21-8       listenv_0.9.0        digest_0.6.35        magrittr_2.0.3      
 [9] evaluate_0.21        grid_4.3.1           timechange_0.3.0     fastmap_1.1.1        processx_3.8.4       ps_1.7.5             purrr_1.0.1          fansi_1.0.6         
[17] scales_1.3.0         codetools_0.2-19     cli_3.6.2            rlang_1.1.3          parallelly_1.36.0    future.apply_1.11.2  ellipsis_0.3.2       munsell_0.5.1       
[25] yaml_2.3.7           withr_3.0.0          tools_4.3.1          parallel_4.3.1       dplyr_1.1.2          colorspace_2.1-0     ggplot2_3.5.0        globals_0.16.2      
[33] vctrs_0.6.5          R6_2.5.1             lifecycle_1.0.4      lubridate_1.9.3      fs_1.6.2             callr_3.7.3          clipr_0.8.0          pkgconfig_2.0.3     
[41] urca_1.3-3           progressr_0.13.0     pillar_1.9.0         gtable_0.3.4         Rcpp_1.0.10          xfun_0.39            tibble_3.2.1         tidyselect_1.2.0    
[49] rstudioapi_0.14      knitr_1.43           farver_2.1.1         htmltools_0.5.8.1    nlme_3.1-162         rmarkdown_2.22       compiler_4.3.1       distributional_0.3.2