spsanderson / TidyDensity

Create tidy probability/density tibbles and plots of randomly generated and empirical data.
https://www.spsanderson.com/TidyDensity
Other
34 stars 1 forks source link

Function `tst_fnc` #276

Closed spsanderson closed 1 year ago

spsanderson commented 1 year ago

Function:

#' Tidy Stats of Tidy Distribution
#' 
#' @family Statistic
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details
#' A function to return the value(s) of a given `tidy_` distribution function
#' output and chosen column from it. This function will only work with `tidy_`
#' distribution functions.
#' 
#' There are currently three different output types for this function. These are:
#' *  "vector" - which gives an `sapply()` output
#' *  "list" - which gives an `lapply()` output, and
#' *  "tibble" - which returns a `tibble` in long format.
#' 
#' Currently you can pass any stat function that performs an operation on a vector
#' input. This means you can pass things like `IQR`, `quantile` and their associated
#' arguments in the `...` portion of the function.
#' 
#' This function also by default will rename the value column of the `tibble` to 
#' the name of the function. This function will also give the column name of sim_number
#' for the `tibble` output with the corresponding simulation numbers as the values.
#' 
#' For the `sapply` and `lapply` outputs the column names will also give the
#' simulation number information by making column names like `sim_number_1` etc.
#' 
#'
#' @description
#' A function to return the `stat` function values of a given `tidy_` distribution
#' output.
#'
#' @param .data The input data coming from a `tidy_` distribution function.
#' @param .x The default is `y` but can be one of the other columns from the
#' input data.
#' @param .fns The default is `IQR`, but this can be any `stat` function like
#' `quantile` or `median` etc.
#' @param .return_type The default is "vector" which returns an `sapply` object.
#' @param ... Addition function arguments to be supplied to the parameters of
#' `.fns`
#'
#' @examples
#' tn <- tidy_normal(.num_sims = 3)
#' 
#' p <- c(0.025, 0.25, 0.5, 0.75, 0.95)
#' 
#' tidy_stat_tbl(tn, y, quantile, "vector", probs = p, na.rm = TRUE)
#' tidy_stat_tbl(tn, y, quantile, "list", probs = p)
#' tidy_stat_tbl(tn, y, quantile, "tibble", probs = p)
#'
#' @return
#' A return of object of either `sapply` `lapply` or `tibble` based upon user input.
#'
#' @export
#'

tidy_stat_tbl <- function(.data, .x = y, .fns, .return_type = "vector", ...) {

  atb <- attributes(.data)

  # Tidyeval ----
  value_var_expr <- rlang::enquo(.x)
  func <- .fns
  func_chr <- deparse(substitute(.fns))
  passed_args <- list(...)
  return_type <- tolower(as.character(.return_type))

  # Checks ----
  if (!return_type %in% c("vector", "list", "tibble", "data.frame")) {
    rlang::abort(
      message = "'.return_type' must be either 'vector','list', or 'tibble'",
      use_cli_format = TRUE
    )
  }

  if (!"tibble_type" %in% names(atb)) {
    rlang::abort(
      message = "'.data' must come from a 'tidy_' distribution function.",
      use_cli_format = TRUE
    )
  }

  if (rlang::quo_is_missing(value_var_expr)) {
    rlang::abort(
      message = "'.x' must be a column from the data.frame/tibble passed to '.data'."
    )
  }

  # Prep tibble ----
  # Check to see if it is a bootstrap tibble first
  # Is it a Bootstrap Nested tibble?
  if (atb$tibble_type == "tidy_bootstrap_nested") {
    df_tbl <- dplyr::as_tibble(.data) %>%
      TidyDensity::bootstrap_unnest_tbl()  %>%
      split(.$sim_number) %>%
      purrr::map(.f = ~ .x %>% dplyr::pull(y))
  }

  # Is it an unnested bootstrap tibble?
  if (atb$tibble_type == "tidy_bootstrap") {
    df_tbl <- dplyr::as_tibble(.data) %>%
      split(.$sim_number) %>%
      purrr::map(.f = ~ .x %>% dplyr::pull(y))
  }

  # If regular tidy_ dist tibble
  if (!atb$tibble_type %in% c("tidy_bootstrap", "tidy_bootstrap_nested")) {
    df_tbl <- dplyr::as_tibble(.data) %>%
      split(.$sim_number) %>%
      purrr::map(.f = ~ .x %>% dplyr::pull({{value_var_expr}}))
  }

  # New Param Args ----
  if ("na.rm" %in% names(passed_args)) {
    tmp_args <- passed_args[!names(passed_args) == "na.rm"]
  }

  if (!exists("tmp_args")) {
    args <- passed_args
  } else {
    args <- tmp_args
  }

  # Run func ----
  if (return_type == "vector") {
    ret <- sapply(df_tbl, func, ...)
    if (is.null(colnames(ret))){
      cn <- names(ret)
    } else {
      cn <- colnames(ret)
    }
    cn <- stringr::str_c("sim_number_", cn)

    if (is.null(colnames(ret))){
      names(ret) <- cn
    } else {
      colnames(ret) <- cn
    }
  }

  if (return_type == "list") {
    ret <- lapply(df_tbl, func, ...)
    ln <- names(ret)
    cn <- stringr::str_c("sim_number_", ln)
    names(ret) <- cn
  }

  if (return_type == "tibble") {
    ret <- purrr::map(
      df_tbl, ~ func(.x) %>%
        purrr::imap(.f = ~ cbind(.x, name = .y)) %>%
        purrr::map_df(dplyr::as_tibble) %>%
        dplyr::select(2, 1)
    ) %>%
      purrr::imap(~ cbind(.x, sim_number = .y)) %>%
      purrr::map_df(dplyr::as_tibble) %>%
      dplyr::select(sim_number, name, .x) %>%
      dplyr::mutate(.x = as.numeric(.x)) %>%
      dplyr::mutate(sim_number = factor(sim_number)) %>%
      dplyr::rename(value = .x)

    cn <- c("sim_number","name",func_chr)
    names(ret) <- cn
  }

  # Return
  # attr(ret, "tibble_type") <- "tidy_stat_tbl"
  # attr(ret, ".fns") <- deparse(substitute(.fns))
  # attr(ret, "incoming_tibble_type") <- atb$tibble_type
  # attr(ret, ".return_type") <- .return_type
  # attr(ret, ".return_type_function") <- switch(
  #   return_type,
  #   "vector" = "sapply",
  #   "list" ="lapply",
  #   "tibble" = "purr_map"
  # )
  # attr(ret, "class") <- "tidy_stat_tbl"

  return(ret)
}

Example:

> library(TidyDensity)
> library(dplyr)
> library(purrr)
> library(ggplot2)
> library(tidyr)
> 
> tn <- tidy_normal(.num_sims = 3)
> 
> p <- c(0.025, 0.25, 0.5, 0.75, 0.95)
> 
> tst_fnc(tn, y, quantile, "vector", probs = p, na.rm = TRUE)
     sim_number_1 sim_number_2 sim_number_3
2.5%   -1.0568345   -1.7947478   -1.8005644
25%    -0.2867838   -1.1110767   -0.8192209
50%     0.1093028   -0.2459402    0.1753937
75%     0.9017928    0.2024745    0.8286905
95%     1.6004753    1.3689743    1.3514777
> tst_fnc(tn, y, quantile, "list", probs = p)
$sim_number_1
      2.5%        25%        50%        75%        95% 
-1.0568345 -0.2867838  0.1093028  0.9017928  1.6004753 

$sim_number_2
      2.5%        25%        50%        75%        95% 
-1.7947478 -1.1110767 -0.2459402  0.2024745  1.3689743 

$sim_number_3
      2.5%        25%        50%        75%        95% 
-1.8005644 -0.8192209  0.1753937  0.8286905  1.3514777 

> tst_fnc(tn, y, quantile, "tibble", probs = p)
# A tibble: 15 × 3
   sim_number name  quantile
   <fct>      <chr>    <dbl>
 1 1          0%      -1.82 
 2 1          25%     -0.287
 3 1          50%      0.109
 4 1          75%      0.902
 5 1          100%     1.86 
 6 2          0%      -2.14 
 7 2          25%     -1.11 
 8 2          50%     -0.246
 9 2          75%      0.202
10 2          100%     1.81 
11 3          0%      -1.89 
12 3          25%     -0.819
13 3          50%      0.175
14 3          75%      0.829
15 3          100%     1.89 
> 
> tst_fnc(tn, y, IQR, "vector", na.rm = TRUE)
sim_number_1 sim_number_2 sim_number_3 
    1.188577     1.313551     1.647911 
> tst_fnc(tn, y, IQR, "list", na.rm = TRUE)
$sim_number_1
[1] 1.188577

$sim_number_2
[1] 1.313551

$sim_number_3
[1] 1.647911

> tst_fnc(tn, y, IQR, "tibble", na.rm = TRUE)
# A tibble: 3 × 3
  sim_number  name   IQR
  <fct>      <dbl> <dbl>
1 1              1  1.19
2 2              1  1.31
3 3              1  1.65
> 
> tst_fnc(tn, y, mad, "vector", na.rm = TRUE)
sim_number_1 sim_number_2 sim_number_3 
   0.8053661    1.0505158    1.1060861 
> tst_fnc(tn, y, mad, "list", na.rm = TRUE)
$sim_number_1
[1] 0.8053661

$sim_number_2
[1] 1.050516

$sim_number_3
[1] 1.106086

> tst_fnc(tn, y, mad, "tibble", na.rm = TRUE)
# A tibble: 3 × 3
  sim_number  name   mad
  <fct>      <dbl> <dbl>
1 1              1 0.805
2 2              1 1.05 
3 3              1 1.11 
> 
> tst_fnc(tn, y, fivenum, "vector", na.rm = TRUE)
     sim_number_1 sim_number_2 sim_number_3
[1,]   -1.8171018   -2.1401932   -1.8916276
[2,]   -0.2868247   -1.1260732   -0.8399117
[3,]    0.1093028   -0.2459402    0.1753937
[4,]    0.9034394    0.2250218    0.8366134
[5,]    1.8552799    1.8119322    1.8895998
> tst_fnc(tn, y, fivenum, "list", na.rm = TRUE)
$sim_number_1
[1] -1.8171018 -0.2868247  0.1093028  0.9034394  1.8552799

$sim_number_2
[1] -2.1401932 -1.1260732 -0.2459402  0.2250218  1.8119322

$sim_number_3
[1] -1.8916276 -0.8399117  0.1753937  0.8366134  1.8895998

> tst_fnc(tn, y, fivenum, "tibble", na.rm = TRUE)
# A tibble: 15 × 3
   sim_number  name fivenum
   <fct>      <dbl>   <dbl>
 1 1              1  -1.82 
 2 1              2  -0.287
 3 1              3   0.109
 4 1              4   0.903
 5 1              5   1.86 
 6 2              1  -2.14 
 7 2              2  -1.13 
 8 2              3  -0.246
 9 2              4   0.225
10 2              5   1.81 
11 3              1  -1.89 
12 3              2  -0.840
13 3              3   0.175
14 3              4   0.837
15 3              5   1.89 
> 
> tst_fnc(tn, y, range, "vector", na.rm = TRUE)
     sim_number_1 sim_number_2 sim_number_3
[1,]    -1.817102    -2.140193    -1.891628
[2,]     1.855280     1.811932     1.889600
> tst_fnc(tn, y, range, "list", na.rm = TRUE)
$sim_number_1
[1] -1.817102  1.855280

$sim_number_2
[1] -2.140193  1.811932

$sim_number_3
[1] -1.891628  1.889600

> tst_fnc(tn, y, range, "tibble", na.rm = TRUE)
# A tibble: 6 × 3
  sim_number  name range
  <fct>      <dbl> <dbl>
1 1              1 -1.82
2 1              2  1.86
3 2              1 -2.14
4 2              2  1.81
5 3              1 -1.89
6 3              2  1.89
> 
> tst_fnc(tn, y, median, "vector", na.rm = TRUE)
sim_number_1 sim_number_2 sim_number_3 
   0.1093028   -0.2459402    0.1753937 
> tst_fnc(tn, y, median, "list", na.rm = TRUE)
$sim_number_1
[1] 0.1093028

$sim_number_2
[1] -0.2459402

$sim_number_3
[1] 0.1753937

> tst_fnc(tn, y, median, "tibble", na.rm = TRUE)
# A tibble: 3 × 3
  sim_number  name median
  <fct>      <dbl>  <dbl>
1 1              1  0.109
2 2              1 -0.246
3 3              1  0.175
> 
> tst_fnc(tn, y, mean, "vector", na.rm = TRUE)
sim_number_1 sim_number_2 sim_number_3 
  0.26894997  -0.28985585  -0.01791235 
> tst_fnc(tn, y, mean, "list", na.rm = TRUE)
$sim_number_1
[1] 0.26895

$sim_number_2
[1] -0.2898558

$sim_number_3
[1] -0.01791235

> tst_fnc(tn, y, mean, "tibble", na.rm = TRUE)
# A tibble: 3 × 3
  sim_number  name    mean
  <fct>      <dbl>   <dbl>
1 1              1  0.269 
2 2              1 -0.290 
3 3              1 -0.0179
> 
> tst_fnc(tn, y, min, "vector", na.rm = TRUE)
sim_number_1 sim_number_2 sim_number_3 
   -1.817102    -2.140193    -1.891628 
> tst_fnc(tn, y, min, "list", na.rm = TRUE)
$sim_number_1
[1] -1.817102

$sim_number_2
[1] -2.140193

$sim_number_3
[1] -1.891628

> tst_fnc(tn, y, min, "tibble", na.rm = TRUE)
# A tibble: 3 × 3
  sim_number  name   min
  <fct>      <dbl> <dbl>
1 1              1 -1.82
2 2              1 -2.14
3 3              1 -1.89
> 
> tst_fnc(tn, y, cummean, "vector")
      sim_number_1 sim_number_2 sim_number_3
 [1,]  0.847681129  -1.44432106  1.284206854
 [2,] -0.115324205  -0.05626296  1.094194555
 [3,] -0.093502901  -0.05117195  0.122322510
 [4,] -0.141833363  -0.24419683  0.304961507
 [5,] -0.149606802  -0.55372883  0.404096022
 [6,]  0.009082290  -0.69506819  0.118160471
 [7,] -0.001400895  -0.60705924  0.019201934
 [8,]  0.110880880  -0.60427875 -0.077841894
 [9,]  0.245079300  -0.77493591  0.087086929
[10,]  0.327769947  -0.71061838  0.149923181
[11,]  0.453179801  -0.64010404  0.245519460
[12,]  0.365656305  -0.56801022  0.080999254
[13,]  0.315478050  -0.52928801  0.139356757
[14,]  0.270458806  -0.53999452  0.097210778
[15,]  0.311342038  -0.42912268  0.093187394
[16,]  0.351126033  -0.41917755  0.051941124
[17,]  0.335278202  -0.42346900  0.146728564
[18,]  0.388391841  -0.39245224  0.149826698
[19,]  0.352637372  -0.32610794  0.078807368
[20,]  0.347873046  -0.32568551  0.002923755
[21,]  0.374328585  -0.36379921 -0.051181786
[22,]  0.340387750  -0.32604826 -0.053102606
[23,]  0.345467751  -0.38079715 -0.119543923
[24,]  0.345871283  -0.34350891 -0.193380744
[25,]  0.332981945  -0.37953422 -0.155741399
[26,]  0.298203084  -0.37249743 -0.077074431
[27,]  0.336781001  -0.30687184 -0.043234141
[28,]  0.289653244  -0.27953626 -0.073357145
[29,]  0.249749859  -0.30665870 -0.031021827
[30,]  0.253463919  -0.33797036 -0.005204433
[31,]  0.260925339  -0.26861867 -0.046765874
[32,]  0.298706990  -0.25699927 -0.085293948
[33,]  0.327701348  -0.20130408 -0.058317712
[34,]  0.322089331  -0.20906782 -0.078089642
[35,]  0.306257425  -0.25090374 -0.063991591
[36,]  0.308156056  -0.23087810 -0.064375608
[37,]  0.296453608  -0.19556815 -0.071030622
[38,]  0.337475354  -0.19749967 -0.062991333
[39,]  0.352182533  -0.20267796 -0.039351907
[40,]  0.372534459  -0.20318387 -0.059365900
[41,]  0.408258408  -0.23139457 -0.027834851
[42,]  0.399297105  -0.23021388 -0.023102518
[43,]  0.382601895  -0.23263980 -0.013814405
[44,]  0.361831902  -0.25990307 -0.051059584
[45,]  0.362870588  -0.28931118 -0.027293257
[46,]  0.344211476  -0.28302387 -0.022789831
[47,]  0.332692850  -0.31520607 -0.008397876
[48,]  0.321283695  -0.31675038 -0.022246405
[49,]  0.311522451  -0.31372382 -0.029527899
[50,]  0.268949967  -0.28985585 -0.017912352
> tst_fnc(tn, y, cummean, "list")
$sim_number_1
 [1]  0.847681129 -0.115324205 -0.093502901 -0.141833363 -0.149606802  0.009082290
 [7] -0.001400895  0.110880880  0.245079300  0.327769947  0.453179801  0.365656305
[13]  0.315478050  0.270458806  0.311342038  0.351126033  0.335278202  0.388391841
[19]  0.352637372  0.347873046  0.374328585  0.340387750  0.345467751  0.345871283
[25]  0.332981945  0.298203084  0.336781001  0.289653244  0.249749859  0.253463919
[31]  0.260925339  0.298706990  0.327701348  0.322089331  0.306257425  0.308156056
[37]  0.296453608  0.337475354  0.352182533  0.372534459  0.408258408  0.399297105
[43]  0.382601895  0.361831902  0.362870588  0.344211476  0.332692850  0.321283695
[49]  0.311522451  0.268949967

$sim_number_2
 [1] -1.44432106 -0.05626296 -0.05117195 -0.24419683 -0.55372883 -0.69506819
 [7] -0.60705924 -0.60427875 -0.77493591 -0.71061838 -0.64010404 -0.56801022
[13] -0.52928801 -0.53999452 -0.42912268 -0.41917755 -0.42346900 -0.39245224
[19] -0.32610794 -0.32568551 -0.36379921 -0.32604826 -0.38079715 -0.34350891
[25] -0.37953422 -0.37249743 -0.30687184 -0.27953626 -0.30665870 -0.33797036
[31] -0.26861867 -0.25699927 -0.20130408 -0.20906782 -0.25090374 -0.23087810
[37] -0.19556815 -0.19749967 -0.20267796 -0.20318387 -0.23139457 -0.23021388
[43] -0.23263980 -0.25990307 -0.28931118 -0.28302387 -0.31520607 -0.31675038
[49] -0.31372382 -0.28985585

$sim_number_3
 [1]  1.284206854  1.094194555  0.122322510  0.304961507  0.404096022  0.118160471
 [7]  0.019201934 -0.077841894  0.087086929  0.149923181  0.245519460  0.080999254
[13]  0.139356757  0.097210778  0.093187394  0.051941124  0.146728564  0.149826698
[19]  0.078807368  0.002923755 -0.051181786 -0.053102606 -0.119543923 -0.193380744
[25] -0.155741399 -0.077074431 -0.043234141 -0.073357145 -0.031021827 -0.005204433
[31] -0.046765874 -0.085293948 -0.058317712 -0.078089642 -0.063991591 -0.064375608
[37] -0.071030622 -0.062991333 -0.039351907 -0.059365900 -0.027834851 -0.023102518
[43] -0.013814405 -0.051059584 -0.027293257 -0.022789831 -0.008397876 -0.022246405
[49] -0.029527899 -0.017912352

> tst_fnc(tn, y, cummean, "tibble")
# A tibble: 150 × 3
   sim_number  name  cummean
   <fct>      <dbl>    <dbl>
 1 1              1  0.848  
 2 1              2 -0.115  
 3 1              3 -0.0935 
 4 1              4 -0.142  
 5 1              5 -0.150  
 6 1              6  0.00908
 7 1              7 -0.00140
 8 1              8  0.111  
 9 1              9  0.245  
10 1             10  0.328  
# … with 140 more rows
# ℹ Use `print(n = ...)` to see more rows
spsanderson commented 1 year ago

Drop attributes