spsanderson / healthyR.ai

healthyR.ai - AI package for the healthyverse
http://www.spsanderson.com/healthyR.ai/
Other
16 stars 6 forks source link

Trig features #58

Closed spsanderson closed 3 years ago

spsanderson commented 3 years ago

https://pycaret.org/trigonometry-features/

spsanderson commented 3 years ago

https://www.tidymodels.org/learn/develop/recipes/

Make a new recipe for

Example:

library(tidyverse)
library(tidymodels)
library(healthyverse)

len_out    = 10
by_unit    = "month"
start_date = as.Date("2021-01-01")

data_tbl <- tibble(
  date_col = seq.Date(from = start_date, length.out = len_out, by = by_unit),
  a    = rnorm(len_out),
  b    = runif(len_out)
)
data_tbl

# A tibble: 10 x 3
   date_col        a       b
   <date>      <dbl>   <dbl>
 1 2021-01-01 -1.17  0.302  
 2 2021-02-01 -1.64  0.742  
 3 2021-03-01  0.442 0.00511
 4 2021-04-01 -0.365 0.899  
 5 2021-05-01  0.588 0.269  
 6 2021-06-01  2.25  0.0533 
 7 2021-07-01  1.00  0.530  
 8 2021-08-01  0.953 0.429  
 9 2021-09-01  3.13  0.734  
10 2021-10-01 -0.124 0.991 
> hai_hyperbolic_vec(data_tbl$a, .scale_type = "sin")
 [1] -0.92202851 -0.99771916  0.42745404 -0.35685349  0.55478765  0.78027398  0.84226498
 [8]  0.81498341  0.01196197 -0.12407367
> hai_hyperbolic_augment(
+   .data = data_tbl
+   , .value = c(a,b)
+   , .scale_type = c("sin","cos","tan")
+ )
# A tibble: 10 x 9
   date_col        a       b   a_sin   b_sin   a_cos b_cos   a_tan   b_tan
   <date>      <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>
 1 2021-01-01 -1.17  0.302   -0.922  0.297    0.387  0.955 -2.38   0.311  
 2 2021-02-01 -1.64  0.742   -0.998  0.676   -0.0675 0.737 14.8    0.916  
 3 2021-03-01  0.442 0.00511  0.427  0.00511  0.904  1.00   0.473  0.00511
 4 2021-04-01 -0.365 0.899   -0.357  0.782    0.934  0.623 -0.382  1.26   
 5 2021-05-01  0.588 0.269    0.555  0.266    0.832  0.964  0.667  0.276  
 6 2021-06-01  2.25  0.0533   0.780  0.0533  -0.625  0.999 -1.25   0.0534 
 7 2021-07-01  1.00  0.530    0.842  0.505    0.539  0.863  1.56   0.585  
 8 2021-08-01  0.953 0.429    0.815  0.416    0.579  0.909  1.41   0.458  
 9 2021-09-01  3.13  0.734    0.0120 0.670   -1.00   0.743 -0.0120 0.902  
10 2021-10-01 -0.124 0.991   -0.124  0.836    0.992  0.548 -0.125  1.53
spsanderson commented 3 years ago

This will require a few different functions, a vectorized function, an augment function (returns tibble), and a recipe

Vectorized Function:

hai_hyperbolic_vec <- function(x, .scale_type = c("sin","cos","tan")){

  scale_type = base::as.character(.scale_type)
  term       = x

  if (scale_type == "sin"){
    ret <- base::sin(term)
  } else if (scale_type == "cos") {
    ret <- base::cos(term)
  } else if (scale_type == "tan") {
    ret <- base::tan(term)
  }

  return(ret)

}

Augment Function:

hai_hyperbolic_augment <- function(.data
                               , .value
                               , .names = "auto"
                               , .scale_type = c("sin","cos","tan","all")
                               ){

  column_expr <- rlang::enquo(.value)

  if(rlang::quo_is_missing(column_expr)) stop(call. = FALSE, "hyperbolic_augment(.value) is missing.")

  col_nms <- names(tidyselect::eval_select(rlang::enquo(.value), .data))

  make_call <- function(col, scale_type){
    rlang::call2(
      "hai_hyperbolic_vec",
      x             = rlang::sym(col)
      , .scale_type = scale_type
      #, .ns = "healthyR.ai"
    )
  }

  grid <- expand.grid(
    col                = col_nms
    , scale_type       = .scale_type
    , stringsAsFactors = FALSE
  )

  calls <- purrr::pmap(.l = list(grid$col, grid$scale_type), make_call)

  if(any(.names == "auto")) {
    newname <- paste0(grid$col, "_", grid$scale_type)
  } else {
    newname <- as.list(.names)
  }

  calls <- purrr::set_names(calls, newname)

  ret <- tibble::as_tibble(dplyr::mutate(.data, !!!calls))

  return(ret)

}

Step Function: (not working)

step_hai_hyperbolic <- function(recipe,
                            ...,
                            role       = "predictor",
                            trained    = FALSE,
                            columns    = NULL,
                            scale_type = c("sin","cos","tan"),
                            skip       = FALSE,
                            id         = rand_id("hai_hyperbolic")
){

  terms <- recipes::ellipse_check(...)
  funcs <- c("sin", "cos", "tan")
  if (!(scale_type %in% funcs))
    rlang::abort("`func` should be either `sin`, `cos`, or `tan`")

  recipes::add_step(
    recipe,
    step_hai_hyperbolic_new(
      terms      = terms,
      role       = role,
      trained    = trained,
      columns    = columns,
      scale_type = scale_type,
      skip       = skip,
      id         = id
    )
  )
}

step_hai_hyperbolic_new <- 
  function(terms, role, trained, columns, scale_type, skip, id){

  recipes::step(
    subclass   = "hai_hyperbolic",
    terms      = terms,
    role       = role,
    trained    = trained,
    columns    = columns,
    scale_type = scale_type,
    skip       = skip,
    id         = id
  )

}

prep.step_hai_hyperbolic <- function(x, training, info = NULL, ...) {

  col_names <- recipes::recipes_eval_select(x$terms, training, info = info)
  recipes::check_type(training[, col_names])

  value_data <- info[info$variable %in% col_names, ]

  if(any(value_data$type != "numeric")){
    rlang::abort(
      paste0("All variables for `step_hai_hyperbolic` must be `numeric`",
             "`integer` `double` classes.")
    )
  }

  step_hai_hyperbolic_new(
    terms      = x$terms,
    role       = x$role,
    trained    = TRUE,
    columns    = col_names,
    scale_type = x$scale_type,
    skip       = x$skip,
    id         = x$id
  )

}

bake.step_hai_hyperbolic <- function(object, new_data, ...){

  # if(!all(object$terms == as.numeric(object$terms))){
  #   rlang::abort("step_hai_hyperbolic() requires `...` to be numeric values.")
  # }

  make_call <- function(col, scale_type){
    rlang::call2(
      "hai_hyperbolic_vec",
      x              = rlang::sym(col)
      ,  .scale_type = scale_type
      #, .ns = "healthyR.ai"
    )
  }

  grid <- expand.grid(
    col                = object$columns
    , scale_type       = object$scale_type
    , stringsAsFactors = FALSE
  )

  calls <- purrr::pmap(.l = list(grid$col, grid$scale_type), make_call)

  # Column Names
  newname <- paste0(grid$col, "_", grid$scale_type)
  calls   <- recipes::check_name(calls, new_data, object, newname, TRUE)

  tibble::as_tibble(dplyr::mutate(new_data, !!!calls))

}

print.step_hai_hyperbolic <-
    function(x, width = max(20, options()$width - 35), ...) {
        cat("Hyperbolic transformation on ", sep = "")
        printer(
            # Names before prep (could be selectors)
            untr_obj = x$terms,
            # Names after prep:
            tr_obj = names(x$scale_type),
            # Has it been prepped? 
            trained = x$trained,
            # An estimate of how many characters to print on a line: 
            width = width
        )
        invisible(x)
    }
spsanderson commented 3 years ago

no prep method found