#' Prep Data for SVM_Poly - Recipe
#'
#' @family Preprocessor
#' @family SVM_Poly
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This function will automatically prep your data.frame/tibble for
#' use in the SVM_Poly algorithm. The SVM_Poly algorithm is for regression only.
#'
#' This function will output a recipe specification.
#'
#' @description Automatically prep a data.frame/tibble for use in the SVM_Poly algorithm.
#'
#' @seealso \url{https://parsnip.tidymodels.org/reference/svm_poly.html}
#'
#' @param .data The data that you are passing to the function. Can be any type
#' of data that is accepted by the `data` parameter of the `recipes::reciep()`
#' function.
#' @param .recipe_formula The formula that is going to be passed. For example
#' if you are using the `diamonds` data then the formula would most likely be something
#' like `price ~ .`
#'
#' @examples
#' # Regression
#' hai_svm_poly_data_prepper(.data = diamonds, .recipe_formula = price ~ .)
#' reg_obj <- hai_svm_poly_data_prepper(diamonds, price ~ .)
#' get_juiced_data(reg_obj)
#'
#' # Classification
#' hai_svm_poly_data_prepper(Titanic, Survived ~ .)
#' cla_obj <- hai_svm_poly_data_prepper(Titanic, Survived ~ .)
#' get_juiced_data(cla_obj)
#'
#' @return
#' A recipe object
#'
#' @export
#'
hai_svm_poly_data_prepper <- function(.data, .recipe_formula){
# Recipe ---
rec_obj <- recipes::recipe(.recipe_formula, data = .data) %>%
recipes::step_zv(recipes::all_predictors()) %>%
recipes::step_normalize(recipes::all_numeric_predictors())
# Return ----
return(rec_obj)
}
Example:
> # Regression
> hai_svm_poly_data_prepper(.data = diamonds, .recipe_formula = price ~ .)
Recipe
Inputs:
role #variables
outcome 1
predictor 9
Operations:
Zero variance filter on recipes::all_predictors()
Centering and scaling for recipes::all_numeric_predictors()
> reg_obj <- hai_svm_poly_data_prepper(diamonds, price ~ .)
> get_juiced_data(reg_obj)
# A tibble: 53,940 x 10
carat cut color clarity depth table x y z price
<dbl> <ord> <ord> <ord> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 -1.20 Ideal E SI2 -0.174 -1.10 -1.59 -1.54 -1.57 326
2 -1.24 Premium E SI1 -1.36 1.59 -1.64 -1.66 -1.74 326
3 -1.20 Good E VS1 -3.38 3.38 -1.50 -1.46 -1.74 327
4 -1.07 Premium I VS2 0.454 0.243 -1.36 -1.32 -1.29 334
5 -1.03 Good J SI2 1.08 0.243 -1.24 -1.21 -1.12 335
6 -1.18 Very Good J VVS2 0.733 -0.205 -1.60 -1.55 -1.50 336
7 -1.18 Very Good I VVS1 0.384 -0.205 -1.59 -1.54 -1.51 336
8 -1.13 Very Good H SI1 0.105 -1.10 -1.48 -1.42 -1.43 337
9 -1.22 Fair E VS2 2.34 1.59 -1.66 -1.71 -1.49 337
10 -1.20 Very Good H VS1 -1.64 1.59 -1.54 -1.47 -1.63 338
# ... with 53,930 more rows
>
> # Classification
> hai_svm_poly_data_prepper(Titanic, Survived ~ .)
Recipe
Inputs:
role #variables
outcome 1
predictor 4
Operations:
Zero variance filter on recipes::all_predictors()
Centering and scaling for recipes::all_numeric_predictors()
> cla_obj <- hai_svm_poly_data_prepper(Titanic, Survived ~ .)
> get_juiced_data(cla_obj)
# A tibble: 32 x 5
Class Sex Age n Survived
<fct> <fct> <fct> <dbl> <fct>
1 1st Male Child -0.506 No
2 2nd Male Child -0.506 No
3 3rd Male Child -0.248 No
4 Crew Male Child -0.506 No
5 1st Female Child -0.506 No
6 2nd Female Child -0.506 No
7 3rd Female Child -0.381 No
8 Crew Female Child -0.506 No
9 1st Male Adult 0.362 No
10 2nd Male Adult 0.627 No
# ... with 22 more rows
Function:
Example: