Same message as #128. Looks like this is due to having a gap of data availability between seasons, and the requested forecast date having data for some lags but not all. Ideally we would give a good message in test lag preparation, rather than balk in residual quantiles.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(purrr)
library(ggplot2)
library(epidatr)
#> ! epidatr cache is being used (set env var EPIDATR_USE_CACHE=FALSE if not
#> intended).
#> ℹ The cache directory is ~/.cache/R/epidatr.
#> ℹ The cache will be cleared after 14 days and will be pruned if it exceeds 4096
#> MB.
#> ℹ The log of cache transactions is stored at ~/.cache/R/epidatr/logfile.txt.
library(epiprocess)
#>
#> Attaching package: 'epiprocess'
#> The following object is masked from 'package:stats':
#>
#> filter
library(epipredict)
#> Loading required package: parsnip
#>
#> Attaching package: 'epipredict'
#> The following object is masked from 'package:ggplot2':
#>
#> layer
flusurv_analysis_issue <- as.Date("2019-08-01") %>%
MMWRweek::MMWRweek() %>%
{.$MMWRyear * 100L + .$MMWRweek}
flusurv_issue_data <-
pub_flusurv(
locations = "network_all",
issues = epirange(123401, flusurv_analysis_issue)
)
#> Warning: Loading from the cache at /home/fullname/.cache/R/epidatr; see
#> ~/.cache/R/epidatr/logfile.txt for more details.
#> This warning is displayed once every 8 hours.
flusurv_archive <- flusurv_issue_data %>%
select(geo_value = location,
time_value = epiweek,
version = release_date,
starts_with("rate_")) %>%
as_epi_archive(compactify = TRUE)
archive <- flusurv_archive
forecast_dates <- seq(min(archive$DT$version) + 120L, archive$versions_end,
by = "6 weeks")
horizons <- c(0, 7, 14, 21, 28) # relative to forecast_date
example_forecaster <- function(snapshot_edf, forecast_date) {
shared_reporting_latency <- as.integer(forecast_date - max(snapshot_edf$time_value))
horizons %>%
map(function(horizon) {
snapshot_edf %>%
arx_forecaster(
outcome = "rate_overall",
predictors = "rate_overall",
args_list = arx_args_list(
# (this is incomplete; latency often varies signficantly by covariate and can't be ignored, so we also need lag adjustment.)
ahead = shared_reporting_latency + horizon,
## ahead = horizon,
quantile_levels = c(0.1, 0.5, 0.9)
)) %>%
.$predictions %>%
mutate(forecast_date = forecast_date,
target_date = forecast_date + horizon)
}) %>%
bind_rows()
}
latest_edf <- archive %>% epix_as_of(.$versions_end)
unfaithful_forecasts <- latest_edf %>%
# pretend we get observations about today, on today, with no revisions
mutate(version = time_value) %>%
as_epi_archive(versions_end = max(forecast_dates)) %>%
epix_slide(
# pretend version releases are on forecast dates
ref_time_values = forecast_dates,
before = 365000L, # 1000-year time window --> don't filter out any `time_value`s
~ example_forecaster(.x, .ref_time_value),
names_sep = NULL
) %>%
select(-time_value)
#> Warning in max.default(structure(numeric(0), class = "Date"), na.rm = FALSE):
#> no non-missing arguments to max; returning -Inf
#> Error in `map()` at rlang/R/dots.R:91:3:
#> ℹ In index: 1.
#> Caused by error in `lm.fit()`:
#> ! 0 (non-NA) cases
#> Backtrace:
#> ▆
#> 1. ├─... %>% select(-time_value)
#> 2. ├─dplyr::select(., -time_value)
#> 3. ├─epiprocess::epix_slide(...) at dplyr/R/select.R:54:3
#> 4. │ └─x$slide(...)
#> 5. │ ├─... %>% ungroup()
#> 6. │ └─self$group_by()$slide(...) at dplyr/R/group-by.R:153:3
#> 7. │ └─base::lapply(...)
#> 8. │ └─epiprocess (local) FUN(X[[i]], ...)
#> 9. │ ├─dplyr::group_modify(...)
#> 10. │ ├─epiprocess:::group_modify.epi_df(...) at dplyr/R/group-map.R:156:3
#> 11. │ │ └─dplyr::dplyr_reconstruct(NextMethod(), .data)
#> 12. │ │ └─dplyr:::dplyr_new_data_frame(data) at dplyr/R/generics.R:196:3
#> 13. │ │ ├─row.names %||% .row_names_info(x, type = 0L) at dplyr/R/utils.R:18:3
#> 14. │ │ └─base::.row_names_info(x, type = 0L) at dplyr/R/utils.R:18:3
#> 15. │ ├─base::NextMethod()
#> 16. │ └─dplyr:::group_modify.data.frame(...)
#> 17. │ └─epiprocess (local) .f(.data, group_keys(.data), ...) at dplyr/R/group-map.R:166:3
#> 18. │ └─f(.data_group, .group_key, ref_time_value, ...)
#> 19. │ └─global example_forecaster(.x, .ref_time_value)
#> 20. │ └─... %>% bind_rows()
#> 21. ├─dplyr::ungroup(.)
#> 22. ├─dplyr::bind_rows(.)
#> 23. │ └─rlang::list2(...) at dplyr/R/bind-rows.R:31:3
#> 24. ├─purrr::map(...) at rlang/R/dots.R:91:3
#> 25. │ └─purrr:::map_("list", .x, .f, ..., .progress = .progress) at purrr/R/map.R:129:3
#> 26. │ ├─purrr:::with_indexed_errors(...) at purrr/R/map.R:174:3
#> 27. │ │ └─base::withCallingHandlers(...) at purrr/R/map.R:201:3
#> 28. │ ├─purrr:::call_with_cleanup(...) at purrr/R/map.R:174:3
#> 29. │ └─.f(.x[[i]], ...)
#> 30. │ └─... %>% ...
#> 31. ├─dplyr::mutate(...)
#> 32. ├─epipredict::arx_forecaster(...) at dplyr/R/mutate.R:146:3
#> 33. │ ├─generics::fit(wf, epi_data)
#> 34. │ ├─epipredict:::fit.epi_workflow(wf, epi_data)
#> 35. │ ├─base::NextMethod()
#> 36. │ └─workflows:::fit.workflow(wf, epi_data)
#> 37. │ └─workflows::.fit_model(workflow, control)
#> 38. │ ├─generics::fit(action_model, workflow = workflow, control = control)
#> 39. │ └─workflows:::fit.action_model(...)
#> 40. │ └─workflows:::fit_from_xy(spec, mold, case_weights, control_parsnip)
#> 41. │ ├─generics::fit_xy(...)
#> 42. │ └─parsnip::fit_xy.model_spec(...)
#> 43. │ └─parsnip:::xy_form(...)
#> 44. │ └─parsnip:::form_form(...)
#> 45. │ └─parsnip:::eval_mod(...)
#> 46. │ └─rlang::eval_tidy(e, env = envir, ...)
#> 47. ├─stats::lm(formula = ..y ~ ., data = data) at rlang/R/eval-tidy.R:121:3
#> 48. │ └─stats::lm.fit(...)
#> 49. │ └─base::stop("0 (non-NA) cases")
#> 50. └─base::.handleSimpleError(...)
#> 51. └─purrr (local) h(simpleError(msg, call))
#> 52. └─cli::cli_abort(...) at purrr/R/map.R:215:9
#> 53. └─rlang::abort(...) at cli/R/rlang.R:45:3
The difference between this and #332 is that this is dealing with a lagset being bad for test data, while #332 is about a shiftset being bad for training data.
Same message as #128. Looks like this is due to having a gap of data availability between seasons, and the requested forecast date having data for some lags but not all. Ideally we would give a good message in test lag preparation, rather than balk in residual quantiles.
Created on 2024-05-14 with reprex v2.0.2