AlbertoAlmuinha / bayesmodels

The Tidymodels Extension for Bayesian Models
https://albertoalmuinha.github.io/bayesmodels/
Other
54 stars 9 forks source link

Formula Interface: causes loss of Transformation Information #8

Closed mdancho84 closed 2 years ago

mdancho84 commented 2 years ago

Calibration needs to be able to determine if transformations were applied.

Cannot determine if transformation is required on 'actual_data'

Problem:

library(bayesmodels)

library(tidymodels)

library(timetk)

library(modeltime)

library(modeltime.resample)

library(modeltime.ensemble)

data(iclaims)

names(initial.claims)

df <- timetk::tk_tbl(initial.claims)

df %>% plot_time_series(.date_var = index,

                        .value = iclaimsNSA,

                        .smooth = FALSE)

# Issue 1

# split

splits <- time_series_split(

    data = df,

    # date_var = 'date',

    assess     = 52,

    cumulative = TRUE

)

# splits %>% tk_time_series_cv_plan() %>% plot_time_series_cv_plan(index, iclaimsNSA)

ss <- AddLocalLinearTrend(list(), training(splits)$iclaimsNSA)

ss <- AddSeasonal(ss, training(splits)$iclaimsNSA, nseasons = 52)

modelo <- bayesian_structural_reg() %>%

    set_engine("stan", state.specification = ss, niter = 1000) %>%

    fit(iclaimsNSA ~ index, data = training(splits))

modeltime_tbl <- modeltime_table(modelo)

calib_tbl <- modeltime_table(modelo) %>% modeltime_calibrate(testing(splits))

Results in this:

Warning message:
Problem with `mutate()` column `.nested.col`.
i `.nested.col = purrr::map2(...)`.
i Cannot determine if transformation is required on 'actual_data' 
mdancho84 commented 2 years ago

I noticed this same issue with adam_reg() in the new modeltime, and I have made a few adjustments to switch from the "formula" interface to the "data.frame" parsnip interface.

You can see that I've added the interface = "data.frame" here. https://github.com/business-science/modeltime/blob/61eed2bfc996191cb91c0416bbfeaa62587573e1/R/parsnip-adam_data.R#L197

    # * Fit ----
    parsnip::set_fit(
        model         = model,
        eng           = engine,
        mode          = "regression",
        value         = list(
            interface = "data.frame",
            protect   = c("x", "y"),
            func      = c(fun = "adam_fit_impl"),
            defaults  = list()
        )
    )

And then modified the function to accept "x" and "y" where "x" is a data.frame with predictors and y is a vector with outcomes. https://github.com/business-science/modeltime/blob/61eed2bfc996191cb91c0416bbfeaa62587573e1/R/parsnip-adam.R#L389

adam_fit_impl <- function(x, y, period = "auto", p = 0, d = 0, q = 0, P = 0, D = 0, Q = 0,
                          model = "ZXZ", constant = FALSE, regressors =  c("use", "select", "adapt"),
                          outliers = c("ignore", "use", "select"), level = 0.99,
                          occurrence = c("none", "auto", "fixed", "general", "odds-ratio",
                                         "inverse-odds-ratio", "direct"),
                          distribution = c("default", "dnorm", "dlaplace", "ds", "dgnorm",
                                           "dlnorm", "dinvgauss", "dgamma"),
                          loss = c("likelihood", "MSE", "MAE", "HAM", "LASSO", "RIDGE", "MSEh",
                                   "TMSE", "GTMSE", "MSCE"),
                          ic   = c("AICc", "AIC", "BIC", "BICc"),
                          select_order = FALSE,
                           ...) {

    # X & Y
    # Expect outcomes  = vector
    # Expect predictor = data.frame
    outcome    <- y
    predictor  <- x

    args <- list(...)

    if (!any(names(args) == "orders")){
        args[["orders"]] <- list(ar = c(p, P), i = c(d, D), ma = c(q, Q), select = select_order)
    }

    if (!any(names(args) == "model")){
        args[["model"]] <- model
    }

    if (!any(names(args) == "constant")){
        args[["constant"]] <- constant
    }

    if (!any(names(args) == "regressors")){
        args[["regressors"]] <- regressors
    }

    if (!any(names(args) == "outliers")){
        args[["outliers"]] <- outliers
    }

    if (!any(names(args) == "level")){
        args[["level"]] <- level
    }

    if (!any(names(args) == "occurrence")){
        args[["occurrence"]] <- occurrence
    }

    if (!any(names(args) == "distribution")){
        args[["distribution"]] <- distribution
    }

    if (!any(names(args) == "loss")){
        args[["loss"]] <- loss
    }

    if (!any(names(args) == "ic")){
        args[["ic"]] <- ic
    }

    # INDEX & PERIOD
    # Determine Period, Index Col, and Index
    index_tbl <- parse_index_from_data(predictor)
    period    <- parse_period_from_index(index_tbl, period)
    idx_col   <- names(index_tbl)
    idx       <- timetk::tk_index(index_tbl)

    # # XREGS
    # # Clean names, get xreg recipe, process predictors

    xreg_recipe <- create_xreg_recipe(predictor, prepare = TRUE)
    xreg_tbl    <- juice_xreg_recipe(xreg_recipe, format = "tbl")

    # Combine Xregs and data
    args[["data"]] <- dplyr::bind_cols(
        tibble::tibble(..y = y),
        xreg_tbl
    ) %>%
        as.data.frame()

    fit_call <- parsnip::make_call(fun  = "adam",
                                   ns   = "smooth",
                                   args = args)

    fit_adam <- rlang::eval_tidy(fit_call)

    # RETURN
    new_modeltime_bridge(
        class = "Adam_fit_impl",

        # Models
        models = list(
            model_1 = fit_adam
        ),

        # Data - Date column (matches original), .actual, .fitted, and .residuals columns
        data = tibble::tibble(
            !! idx_col  := idx,
            .actual      =  as.numeric(fit_adam$data[,1]),
            .fitted      =  as.numeric(fit_adam$fitted),
            .residuals   =  as.numeric(fit_adam$residuals)
        ),

        # Preprocessing Recipe (prepped) - Used in predict method
        extras = list(
            xreg_recipe = xreg_recipe
        ),

        # Description - Convert ADAM model parameters to short description
        desc = "ADAM Model"
    )

}

The predict function should then look like this. https://github.com/business-science/modeltime/blob/61eed2bfc996191cb91c0416bbfeaa62587573e1/R/parsnip-adam.R#L512

Adam_predict_impl <- function(object, new_data, ...) {

    # PREPARE INPUTS
    model       <- object$models$model_1
    xreg_recipe <- object$extras$xreg_recipe
    h_horizon   <- nrow(new_data)

    # XREG
    xreg_tbl <- bake_xreg_recipe(xreg_recipe, new_data, format = "tbl")

    # PREDICTIONS
    if (!is.null(xreg_tbl)) {
        preds_forecast <- greybox::forecast(model, h = h_horizon, newdata = xreg_tbl, ...)$mean %>% as.vector()
    } else {
        preds_forecast <- greybox::forecast(model, h = h_horizon, ...)$mean %>% as.vector()
    }

    # Return predictions as numeric vector
    #preds <- tibble::as_tibble(preds_forecast)

    return(preds_forecast)

}