Open benshing1984 opened 4 years ago
I don't have the cafe
dataset, so I am unable to test your code above.
I've produced a similar example from the prophet docs, which seems to work fine.
Could you provide the dataset to create a minimally reproducible example?
library(tidyverse)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:dplyr':
#>
#> id
library(fable.prophet)
#> Loading required package: Rcpp
#> Loading required package: fabletools
df <- read_csv("https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv") %>%
as_tsibble(index = ds)
#> Parsed with column specification:
#> cols(
#> ds = col_date(format = ""),
#> y = col_double()
#> )
playoffs <- data_frame(
holiday = 'playoff',
ds = as.Date(c('2008-01-13', '2009-01-03', '2010-01-16',
'2010-01-24', '2010-02-07', '2011-01-08',
'2013-01-12', '2014-01-12', '2014-01-19',
'2014-02-02', '2015-01-11', '2016-01-17',
'2016-01-24', '2016-02-07')),
lower_window = 0,
upper_window = 1
)
#> Warning: `data_frame()` is deprecated, use `tibble()`.
#> This warning is displayed once per session.
superbowls <- data_frame(
holiday = 'superbowl',
ds = as.Date(c('2010-02-07', '2014-02-02', '2016-02-07')),
lower_window = 0,
upper_window = 1
)
holidays <- bind_rows(playoffs, superbowls)
holidays <- as_tsibble(holidays, index = ds, key = holiday)
fit <- df %>%
model(
prophet = prophet(y ~ season("year", 4, type = "multiplicative") + holiday( holidays = holidays))
)
fit
#> # A mable: 1 x 1
#> prophet
#> <model>
#> 1 <prophet>
Created on 2020-01-06 by the reprex package (v0.3.0)
Thanks Mitchell for your prompt reply,
I m testing it with Walmart testing set. and still come up this error.
library(tidyverse)
library(tsibble)
library(fable.prophet)
library(data.table)
library(future)
library(lubridate)
walmart_store_sales_data = data.table::fread("https://remixinstitute.box.com/shared/static/9kzyttje3kd7l41y1e14to0akwl9vuje.csv", header = T, stringsAsFactors = FALSE)
df<- walmart_store_sales_data%>%
mutate(Date = ymd(Date))%>%
as_tsibble(index = Date, key = c(Store, Dept))%>%
select(Date, Store, Dept, Weekly_Sales)
playoffs <- data_frame(
holiday = 'playoff',
ds = as.Date(c('2008-01-13', '2009-01-03', '2010-01-16',
'2010-01-24', '2010-02-07', '2011-01-08',
'2013-01-12', '2014-01-12', '2014-01-19',
'2014-02-02', '2015-01-11', '2016-01-17',
'2016-01-24', '2016-02-07')),
lower_window = 0,
upper_window = 1
)
superbowls <- data_frame(
holiday = 'superbowl',
ds = as.Date(c('2010-02-07', '2014-02-02', '2016-02-07')),
lower_window = 0,
upper_window = 1
)
holidays <- bind_rows(playoffs, superbowls)
holidays <- as_tsibble(holidays, index = ds, key = holiday)
plan(multiprocess)
fit <- df %>%
model(
prophet = prophet( Weekly_Sales ~ season("week", 7, type = "multiplicative") + holiday (holidays = holidays))
)
#Warning message:
#3331 errors (1 unique) encountered for prophet
#[3331] object 'holidays' not found
fit
A mable: 3,331 x 3
Key: Store, Dept [3,331]
Store Dept prophet
<int> <int> <model>
1 1 1 <NULL model>
Thanks for the reproducible example - I can reproduce your error now.
The issue as far as I can tell is with the use of parallel, not holidays.
Could you try a small sample of Store/Dept without parallel?
Parallel does cause this error, is the future update can also multiprocess with holidays effects? that would be a nice.
I just test out smaller sample with and without parallel with my laptop. Parallel process does help when I have a bigger data set.
Thanks anywhere for solving this problem.
> library(tidyverse)
> library(tsibble)
> library(fable.prophet)
> library(data.table)
> library(lubridate)
> library(fable)
> library(tictoc)
>
> walmart_store_sales_data = data.table::fread("https://remixinstitute.box.com/shared/static/9kzyttje3kd7l41y1e14to0akwl9vuje.csv", header = T, stringsAsFactors = FALSE)
[0%] Downloaded 0 bytes...
Downloaded 3087910 bytes...>
> df<- walmart_store_sales_data%>%
+ mutate(Date = ymd(Date))%>%
+ as_tsibble(index = Date, key = c(Store, Dept))%>%
+ select(Date, Store, Dept, Weekly_Sales)%>%
+ filter(Store == c(2, 3) & Dept == c(1:10))
>
>
> playoffs <- data_frame(
+ holiday = 'playoff',
+ ds = as.Date(c('2008-01-13', '2009-01-03', '2010-01-16',
+ '2010-01-24', '2010-02-07', '2011-01-08',
+ '2013-01-12', '2014-01-12', '2014-01-19',
+ '2014-02-02', '2015-01-11', '2016-01-17',
+ '2016-01-24', '2016-02-07')),
+ lower_window = 0,
+ upper_window = 1
+ )
>
>
> superbowls <- data_frame(
+ holiday = 'superbowl',
+ ds = as.Date(c('2010-02-07', '2014-02-02', '2016-02-07')),
+ lower_window = 0,
+ upper_window = 1
+ )
> holidays <- bind_rows(playoffs, superbowls)
> holidays <- as_tsibble(holidays, index = ds, key = holiday)
>
>
> tic()
> fit <- df %>%
+ model(
+ prophet = prophet( Weekly_Sales ~ season("week", 7, type = "multiplicative") )
+ )
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
> fit%>%
+ forecast(h =1)
# A fable: 10 x 6 [70D]
# Key: Store, Dept, .model [10]
Store Dept .model Date Weekly_Sales .distribution
<int> <int> <chr> <date> <dbl> <dist>
1 2 1 prophet 2012-11-23 35652. sim(=dbl[1000])
2 2 3 prophet 2013-01-04 22304. sim(=dbl[1000])
3 2 5 prophet 2012-12-07 35360. sim(=dbl[1000])
4 2 7 prophet 2012-11-09 41939. sim(=dbl[1000])
5 2 9 prophet 2012-12-21 31652. sim(=dbl[1000])
6 3 2 prophet 2012-11-23 20013. sim(=dbl[1000])
7 3 4 prophet 2013-01-04 9320. sim(=dbl[1000])
8 3 6 prophet 2012-12-07 2679. sim(=dbl[1000])
9 3 8 prophet 2012-11-09 8175. sim(=dbl[1000])
10 3 10 prophet 2012-12-21 13142. sim(=dbl[1000])
> toc()
14.85 sec elapsed
>
> #use parallel processing
>
> library(future)
> plan(multiprocess)
> tic()
> fit <- df %>%
+ model(
+ prophet = prophet( Weekly_Sales ~ season("week", 7, type = "multiplicative") )
+ )
Progress: ──────────────────────────────────────────────────────────────── 100%
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
> fit%>%
+ forecast(h =1)
# A fable: 10 x 6 [70D]
# Key: Store, Dept, .model [10]
Store Dept .model Date Weekly_Sales .distribution
<int> <int> <chr> <date> <dbl> <dist>
1 2 1 prophet 2012-11-23 35652. sim(=dbl[1000])
2 2 3 prophet 2013-01-04 22304. sim(=dbl[1000])
3 2 5 prophet 2012-12-07 35360. sim(=dbl[1000])
4 2 7 prophet 2012-11-09 41939. sim(=dbl[1000])
5 2 9 prophet 2012-12-21 31652. sim(=dbl[1000])
6 3 2 prophet 2012-11-23 20013. sim(=dbl[1000])
7 3 4 prophet 2013-01-04 9320. sim(=dbl[1000])
8 3 6 prophet 2012-12-07 2679. sim(=dbl[1000])
9 3 8 prophet 2012-11-09 8175. sim(=dbl[1000])
10 3 10 prophet 2012-12-21 13142. sim(=dbl[1000])
> toc()
21.73 sec elapsed
Mitch. I noticed the same issue.
There's some strange conflict between fable.prophet and future. I'm digging the code to try to find exactly where fable.prophet 'loses contact' with the Environment when running through future.
Here's another example with your good old Victoria Electricity Demand dataset:
library(dplyr)
library(future)
library(tsibble)
library(fable.prophet)
library(fable)
library(tsibbledata)
library(tictoc)
# Create a Holiday Tsibble
holidays <- tsibble::holiday_aus(c(2012, 2013, 2014), state = "national") %>%
rename(ds = date) %>%
as_tsibble()
# First Try
tictoc::tic()
fit <- vic_elec %>%
model(pr1 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day") + holiday(holidays)),
pr2 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day")))
tictoc::toc()
accuracy(fit)
#Second Try / Parallelization
future::plan(multiprocess)
tictoc::tic()
fit2 <- vic_elec %>%
model(pr1 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day") + holiday(holidays)),
pr2 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day")))
tictoc::toc()
accuracy(fit2)
Thanks for investigating this further. I believe this is due to scoping issues with parallel modelling in fabletools: https://github.com/tidyverts/fabletools/issues/146
The model formula will need to be carefully parsed to identify required objects that do not exist within the tsibble, and distribute them to the parallel worker nodes.
I m beginner, how could I add holidays into the model?
Warning message: 8 errors (1 unique) encountered for prophet [8] object 'holidays' not found
it shown this warning message, do I miss anything ?