spsanderson / healthyR.ai

healthyR.ai - AI package for the healthyverse
http://www.spsanderson.com/healthyR.ai/
Other
16 stars 6 forks source link

Add function `hai_skewed_features()` #209

Closed spsanderson closed 2 years ago

spsanderson commented 2 years ago
hai_skewed_features <- function(.data, .threshold = 0.6, .drop_keys = NULL){

  # Tidyeval ----
  threshold <- as.numeric(.threshold)
  drop_keys <- .drop_keys

  # Checks ----
  if (!is.numeric(threshold)){
    rlang::abort(
      message = "The '.threshold' parameter must be numeric.",
      use_cli_format = TRUE
    )
  }

  if (!is.data.frame(.data)){
    rlang::abort(
      message = "The '.data' parameter must be a data.frame/tibble.",
      use_cli_format = TRUE
    )
  }

  if (!is.null(drop_keys) & !is.character(drop_keys)){
    rlang::abort(
      message = "If provided, the '.drop_keys' parameter must be a 'character'. Using
      something like '.drop_keys = c('key_1','key_2',...).",
      use_cli_format = TRUE
    )
  }

  # Data ----
  data_tbl <- tibble::as_tibble(.data)

  # Transforms
  skewed_feature_names <- data_tbl %>%
    dplyr::select(tidyselect::vars_select_helpers$where(is.numeric)) %>%
    purrr::map_df(hai_skewness_vec) %>%
    tidyr::pivot_longer(cols = dplyr::everything()) %>%
    dplyr::filter(!name %in% drop_keys) %>%
    dplyr::mutate(name = as.factor(name)) %>%
    dplyr::rename(key = name) %>%
    dplyr::filter(value >= threshold) %>%
    dplyr::pull(key) %>%
    as.character()

  # Return ----
  return(skewed_feature_names)

}

Example using mtcars

hai_skewed_features(mtcars)
[1] "mpg"  "hp"   "carb" 

hai_skewed_features(mtcars, .drop_keys = c("mpg","hp"))
[1] "carb"

hai_skewed_features(mtcars, .drop_keys = "hp")
[1] "mpg"  "carb"
spsanderson commented 2 years ago

212