poissonconsulting / rescale

An R package to centre and standardize variables based on the values in a second data frame
https://poissonconsulting.github.io/rescale/
Other
0 stars 0 forks source link

Split training from applying #11

Open krlmlr opened 2 months ago

krlmlr commented 2 months ago
make_rescale <- function(
    data,
    ...,
    center = NULL,
    scale = NULL,
    env = rlang::caller_env()) {
  chk::chk_data(data)

  means <- data |>
    dplyr::summarise(dplyr::across({{ center }}, ~ mean(.x, na.rm = TRUE))) |>
    dplyr::collect()

  sds <- data |>
    dplyr::summarise(dplyr::across({{ scale }}, ~ sd(.x, na.rm = TRUE))) |>
    dplyr::collect()

  exprs <- make_scale_expr(data, means, sds)

  rlang::new_function(rlang::pairlist2(data = ), env = env, rlang::expr({
    data |>
      dplyr::mutate(!!!exprs)
  }))
}

make_scale_expr <- function(data, means, sds) {
  all_names <- union(names(means), names(sds))

  exprs <- rlang::set_names(rlang::syms(all_names), all_names)
  exprs[names(means)] <- purrr::imap(means, ~ rlang::expr(!!exprs[[.y]] - !!.x))
  exprs[names(sds)] <- purrr::imap(sds, ~ rlang::expr(!!exprs[[.y]] / !!.x))

  exprs
}

make_rescale(mtcars, center = c("mpg", "wt"), scale = c("wt", "hp", "disp"))
#> function (data) 
#> {
#>     dplyr::mutate(data, mpg = mpg - 20.090625, wt = (wt - 3.21725)/0.978457442989697, 
#>         hp = hp/68.5628684893206, disp = disp/123.938693831382)
#> }

Created on 2024-08-08 with reprex v2.1.0

krlmlr commented 2 months ago

With make_mutate() :

make_rescale <- function(
    data,
    ...,
    center = NULL,
    scale = NULL,
    env = rlang::caller_env()) {
  chk::chk_data(data)

  means <- data |>
    dplyr::summarise(dplyr::across({{ center }}, ~ mean(.x, na.rm = TRUE))) |>
    dplyr::collect()

  sds <- data |>
    dplyr::summarise(dplyr::across({{ scale }}, ~ sd(.x, na.rm = TRUE))) |>
    dplyr::collect()

  exprs <- make_scale_expr(data, means, sds)

  make_mutate(exprs, env)
}

make_scale_expr <- function(data, means, sds) {
  all_names <- union(names(means), names(sds))

  exprs <- rlang::set_names(rlang::syms(all_names), all_names)
  exprs[names(means)] <- purrr::imap(means, ~ rlang::expr(!!exprs[[.y]] - !!.x))
  exprs[names(sds)] <- purrr::imap(sds, ~ rlang::expr(!!exprs[[.y]] / !!.x))

  exprs
}

make_mutate <- function(exprs, env = rlang::caller_env()) {
  rlang::new_function(rlang::pairlist2(data = ), env = env, rlang::expr({
    data |>
      dplyr::mutate(!!!exprs)
  }))
}

make_rescale(mtcars, center = c("mpg", "wt"), scale = c("wt", "hp", "disp"))
#> function (data) 
#> {
#>     dplyr::mutate(data, mpg = mpg - 20.090625, wt = (wt - 3.21725)/0.978457442989697, 
#>         hp = hp/68.5628684893206, disp = disp/123.938693831382)
#> }

Created on 2024-08-08 with reprex v2.1.0