spsanderson / healthyR.ts

A time-series companion package to healthyR
https://www.spsanderson.com/healthyR.ts/
Other
19 stars 3 forks source link

theta #279

Closed spsanderson closed 2 years ago

spsanderson commented 2 years ago

Function:

#' Boilerplate Workflow
#'
#' @family Boiler_Plate
#' @family exp_smoothing
#' @family theta
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This uses the `forecast::thetaf()` for the `parsnip` engine. This
#' model does not use exogenous regressors, so only a univariate model of: value ~ date
#' will be used from the `.date_col` and `.value_col` that you provide.
#'
#' @seealso \url{https://business-science.github.io/modeltime/reference/exp_smoothing.html#engine-details}
#' @seealso \url{https://pkg.robjhyndman.com/forecast/reference/thetaf.html}
#'
#' @description This is a boilerplate function to create automatically the following:
#' -  recipe
#' -  model specification
#' -  workflow
#' -  calibration tibble and plot
#'
#' @param .data The data being passed to the function. The time-series object.
#' @param .date_col The column that holds the datetime.
#' @param .value_col The column that has the value
#' @param .rsamp_obj The rsample splits object
#' @param .prefix Default is `ts_theta`
#' @param .cv_assess How many observations for assess. See [timetk::time_series_cv()]
#' @param .cv_skip How many observations to skip. See [timetk::time_series_cv()]
#' @param .cv_slice_limit How many slices to return. See [timetk::time_series_cv()]
#' @param .bootstrap_final Not yet implemented.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#'
#' data <- AirPassengers %>%
#'   ts_to_tbl() %>%
#'   select(-index)
#'
#' splits <- time_series_split(
#'   data
#'   , date_col
#'   , assess = 12
#'   , skip = 3
#'   , cumulative = TRUE
#' )
#'
#' ts_theta <- ts_auto_theta(
#'   .data = data,
#'   .date_col = date_col,
#'   .value_col = value,
#'   .rsamp_obj = splits
#' )
#'
#' ts_theta$recipe_info
#' }
#'
#' @return
#' A list
#'
#' @export
#'

ts_auto_theta <- function(.data, .date_col, .value_col, .rsamp_obj,
                          .prefix = "ts_theta", .grid_size = 10, .cv_assess = 12, 
                          .cv_skip = 3, .cv_slice_limit = 6, 
                          .bootstrap_final = FALSE){

  # Tidyeval ----
  date_col_var_expr <- rlang::enquo(.date_col)
  value_col_var_expr <- rlang::enquo(.value_col)
  sampling_object <- .rsamp_obj

  # Cross Validation
  cv_assess = as.numeric(.cv_assess)
  cv_skip   = as.numeric(.cv_skip)
  cv_slice  = as.numeric(.cv_slice_limit)

  # Data and splits
  splits <- .rsamp_obj
  data_tbl <- dplyr::as_tibble(.data)

  # Checks ----
  if (rlang::quo_is_missing(date_col_var_expr)){
    rlang::abort(
      message = "'.date_col' must be supplied.",
      use_cli_format = TRUE
    )
  }

  if (rlang::quo_is_missing(value_col_var_expr)){
    rlang::abort(
      message = "'.value_col' must be supplied.",
      use_cli_format = TRUE
    )
  }

  if (!inherits(x = splits, what = "rsplit")){
    rlang::abort(
      message = "'.rsamp_obj' must be have class rsplit, use the rsample package.",
      use_cli_format = TRUE
    )
  }

  # Recipe ----
  # Get the initial recipe call
  data_tbl <- data_tbl %>%
    dplyr::select(
      {{ date_col_var_expr }}
      , {{ value_col_var_expr }}
      , dplyr::everything()
    )

  # * Orignal Cols and formula ----
  ds <- rlang::sym(base::names(data_tbl)[[1]])
  v  <- rlang::sym(base::names(data_tbl)[[2]])
  f  <- stats::as.formula(base::paste(v, " ~ ."))

  recipe_call <- get_recipe_call(match.call())

  rec_syntax <- paste0(.prefix, "_recipe") %>%
    assign_value(!!recipe_call)

  rec_obj <- recipes::recipe(formula = f, data = data_tbl)

  # Tune/Spec ----
  model_spec <- modeltime::exp_smoothing(
    seasonal_period = "auto"
  ) %>%
    parsnip::set_mode(mode = "regression") %>%
    parsnip::set_engine("theta")

  # Workflow ----
  wflw <- workflows::workflow() %>%
    workflows::add_recipe(rec_obj) %>%
    workflows::add_model(model_spec)

  wflw_fit <- wflw %>%
    parsnip::fit(rsample::training(splits))

  # Calibrate and Plot ----
  cap <- healthyR.ts::calibrate_and_plot(
    wflw_fit,
    .splits_obj  = splits,
    .data        = data_tbl,
    .interactive = TRUE,
    .print_info  = FALSE
  )

  # Return ----
  output <- list(
    recipe_info = list(
      recipe_call   = recipe_call,
      recipe_syntax = rec_syntax,
      rec_obj       = rec_obj
    ),
    model_info = list(
      model_spec  = model_spec,
      wflw        = wflw,
      fitted_wflw = wflw_fit,
      was_tuned   = "not_tuned"
    ),
    model_calibration = list(
      plot = cap$plot,
      calibration_tbl = cap$calibration_tbl,
      model_accuracy = cap$model_accuracy
    )
  )

  return(invisible(output))
}

Example:

> ts_theta
$recipe_info
$recipe_info$recipe_call
recipe(.data = data, .date_col = date_col, .value_col = value, 
    .formula = value ~ ., .rsamp_obj = splits, .grid_size = 20, 
    .num_cores = 5)

$recipe_info$recipe_syntax
[1] "ts_theta_recipe <-"                                                                                                                                  
[2] "\n  recipe(.data = data, .date_col = date_col, .value_col = value, .formula = value ~ \n    ., .rsamp_obj = splits, .grid_size = 20, .num_cores = 5)"

$recipe_info$rec_obj
Recipe

Inputs:

      role #variables
   outcome          1
 predictor          1

$model_info
$model_info$model_spec
Exponential Smoothing State Space Model Specification (regression)

Main Arguments:
  seasonal_period = auto

Computational engine: theta 

$model_info$wflw
== Workflow ===============================================================================
Preprocessor: Recipe
Model: exp_smoothing()

-- Preprocessor ---------------------------------------------------------------------------
0 Recipe Steps

-- Model ----------------------------------------------------------------------------------
Exponential Smoothing State Space Model Specification (regression)

Main Arguments:
  seasonal_period = auto

Computational engine: theta 

$model_info$fitted_wflw
== Workflow [trained] =====================================================================
Preprocessor: Recipe
Model: exp_smoothing()

-- Preprocessor ---------------------------------------------------------------------------
0 Recipe Steps

-- Model ----------------------------------------------------------------------------------
Theta Method
---

$model_info$was_tuned
[1] "not_tuned"

$model_calibration
$model_calibration$plot

$model_calibration$calibration_tbl
# Modeltime Table
# A tibble: 1 x 5
  .model_id .model     .model_desc  .type .calibration_data
      <int> <list>     <chr>        <chr> <list>           
1         1 <workflow> THETA METHOD Test  <tibble [12 x 4]>

$model_calibration$model_accuracy
# A tibble: 1 x 9
  .model_id .model_desc  .type   mae  mape  mase smape  rmse    rsq
      <int> <chr>        <chr> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1         1 THETA METHOD Test   70.4  13.2  1.46  14.8  96.9 0.0362