Closed spsanderson closed 3 years ago
https://www.tidymodels.org/learn/develop/recipes/
Make a new recipe for
Example:
library(tidyverse)
library(tidymodels)
library(healthyverse)
len_out = 10
by_unit = "month"
start_date = as.Date("2021-01-01")
data_tbl <- tibble(
date_col = seq.Date(from = start_date, length.out = len_out, by = by_unit),
a = rnorm(len_out),
b = runif(len_out)
)
data_tbl
# A tibble: 10 x 3
date_col a b
<date> <dbl> <dbl>
1 2021-01-01 -1.17 0.302
2 2021-02-01 -1.64 0.742
3 2021-03-01 0.442 0.00511
4 2021-04-01 -0.365 0.899
5 2021-05-01 0.588 0.269
6 2021-06-01 2.25 0.0533
7 2021-07-01 1.00 0.530
8 2021-08-01 0.953 0.429
9 2021-09-01 3.13 0.734
10 2021-10-01 -0.124 0.991
> hai_hyperbolic_vec(data_tbl$a, .scale_type = "sin")
[1] -0.92202851 -0.99771916 0.42745404 -0.35685349 0.55478765 0.78027398 0.84226498
[8] 0.81498341 0.01196197 -0.12407367
> hai_hyperbolic_augment(
+ .data = data_tbl
+ , .value = c(a,b)
+ , .scale_type = c("sin","cos","tan")
+ )
# A tibble: 10 x 9
date_col a b a_sin b_sin a_cos b_cos a_tan b_tan
<date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2021-01-01 -1.17 0.302 -0.922 0.297 0.387 0.955 -2.38 0.311
2 2021-02-01 -1.64 0.742 -0.998 0.676 -0.0675 0.737 14.8 0.916
3 2021-03-01 0.442 0.00511 0.427 0.00511 0.904 1.00 0.473 0.00511
4 2021-04-01 -0.365 0.899 -0.357 0.782 0.934 0.623 -0.382 1.26
5 2021-05-01 0.588 0.269 0.555 0.266 0.832 0.964 0.667 0.276
6 2021-06-01 2.25 0.0533 0.780 0.0533 -0.625 0.999 -1.25 0.0534
7 2021-07-01 1.00 0.530 0.842 0.505 0.539 0.863 1.56 0.585
8 2021-08-01 0.953 0.429 0.815 0.416 0.579 0.909 1.41 0.458
9 2021-09-01 3.13 0.734 0.0120 0.670 -1.00 0.743 -0.0120 0.902
10 2021-10-01 -0.124 0.991 -0.124 0.836 0.992 0.548 -0.125 1.53
This will require a few different functions, a vectorized function, an augment function (returns tibble), and a recipe
Vectorized Function:
hai_hyperbolic_vec <- function(x, .scale_type = c("sin","cos","tan")){
scale_type = base::as.character(.scale_type)
term = x
if (scale_type == "sin"){
ret <- base::sin(term)
} else if (scale_type == "cos") {
ret <- base::cos(term)
} else if (scale_type == "tan") {
ret <- base::tan(term)
}
return(ret)
}
Augment Function:
hai_hyperbolic_augment <- function(.data
, .value
, .names = "auto"
, .scale_type = c("sin","cos","tan","all")
){
column_expr <- rlang::enquo(.value)
if(rlang::quo_is_missing(column_expr)) stop(call. = FALSE, "hyperbolic_augment(.value) is missing.")
col_nms <- names(tidyselect::eval_select(rlang::enquo(.value), .data))
make_call <- function(col, scale_type){
rlang::call2(
"hai_hyperbolic_vec",
x = rlang::sym(col)
, .scale_type = scale_type
#, .ns = "healthyR.ai"
)
}
grid <- expand.grid(
col = col_nms
, scale_type = .scale_type
, stringsAsFactors = FALSE
)
calls <- purrr::pmap(.l = list(grid$col, grid$scale_type), make_call)
if(any(.names == "auto")) {
newname <- paste0(grid$col, "_", grid$scale_type)
} else {
newname <- as.list(.names)
}
calls <- purrr::set_names(calls, newname)
ret <- tibble::as_tibble(dplyr::mutate(.data, !!!calls))
return(ret)
}
Step Function: (not working)
step_hai_hyperbolic <- function(recipe,
...,
role = "predictor",
trained = FALSE,
columns = NULL,
scale_type = c("sin","cos","tan"),
skip = FALSE,
id = rand_id("hai_hyperbolic")
){
terms <- recipes::ellipse_check(...)
funcs <- c("sin", "cos", "tan")
if (!(scale_type %in% funcs))
rlang::abort("`func` should be either `sin`, `cos`, or `tan`")
recipes::add_step(
recipe,
step_hai_hyperbolic_new(
terms = terms,
role = role,
trained = trained,
columns = columns,
scale_type = scale_type,
skip = skip,
id = id
)
)
}
step_hai_hyperbolic_new <-
function(terms, role, trained, columns, scale_type, skip, id){
recipes::step(
subclass = "hai_hyperbolic",
terms = terms,
role = role,
trained = trained,
columns = columns,
scale_type = scale_type,
skip = skip,
id = id
)
}
prep.step_hai_hyperbolic <- function(x, training, info = NULL, ...) {
col_names <- recipes::recipes_eval_select(x$terms, training, info = info)
recipes::check_type(training[, col_names])
value_data <- info[info$variable %in% col_names, ]
if(any(value_data$type != "numeric")){
rlang::abort(
paste0("All variables for `step_hai_hyperbolic` must be `numeric`",
"`integer` `double` classes.")
)
}
step_hai_hyperbolic_new(
terms = x$terms,
role = x$role,
trained = TRUE,
columns = col_names,
scale_type = x$scale_type,
skip = x$skip,
id = x$id
)
}
bake.step_hai_hyperbolic <- function(object, new_data, ...){
# if(!all(object$terms == as.numeric(object$terms))){
# rlang::abort("step_hai_hyperbolic() requires `...` to be numeric values.")
# }
make_call <- function(col, scale_type){
rlang::call2(
"hai_hyperbolic_vec",
x = rlang::sym(col)
, .scale_type = scale_type
#, .ns = "healthyR.ai"
)
}
grid <- expand.grid(
col = object$columns
, scale_type = object$scale_type
, stringsAsFactors = FALSE
)
calls <- purrr::pmap(.l = list(grid$col, grid$scale_type), make_call)
# Column Names
newname <- paste0(grid$col, "_", grid$scale_type)
calls <- recipes::check_name(calls, new_data, object, newname, TRUE)
tibble::as_tibble(dplyr::mutate(new_data, !!!calls))
}
print.step_hai_hyperbolic <-
function(x, width = max(20, options()$width - 35), ...) {
cat("Hyperbolic transformation on ", sep = "")
printer(
# Names before prep (could be selectors)
untr_obj = x$terms,
# Names after prep:
tr_obj = names(x$scale_type),
# Has it been prepped?
trained = x$trained,
# An estimate of how many characters to print on a line:
width = width
)
invisible(x)
}
no prep method found
https://pycaret.org/trigonometry-features/