spsanderson / TidyDensity

Create tidy probability/density tibbles and plots of randomly generated and empirical data.
https://www.spsanderson.com/TidyDensity
Other
34 stars 1 forks source link

Add function set for `tidy_multi_` distribution functions #26

Closed spsanderson closed 2 years ago

spsanderson commented 2 years ago
library(tidyverse)

tidy_multi_normal <- function(.n = 50, .mean = c(-1,0,1), .sd = c(1), .num_sims = 2){

  # Tidyeval ----
  n <- as.integer(.n)
  mu <- as.numeric(.mean)
  std <- as.numeric(.sd)
  num_sims <- as.integer(.num_sims)

  # Checks ----
  if(!is.integer(n) | n <= 0){
    rlang::abort(
      "The .n parameter must be an integer and greater than 0"
    )
  }

  if(!is.integer(num_sims) | num_sims <= 1){
    rlang::abort(
      "The .num_sims parameter must be an integer grater than 1."
    )
  }

  if(!is.numeric(mu)){
    rlang::abort(
      "The .mean parameter must be numeric."
    )
  }

  if(!is.numeric(std)){
    rlang::abort(
      "The .sd parameter must be numeric"
    )
  }

  x <- seq(1, num_sims, 1)

  ps <- seq(-n, n-1, 2)
  qs <- seq(0, 1, (1/(n-1)))

  df <- expand_grid(
    sim_number = x,
    mu = mu,
    std = std
  )

  df <- df %>%
    mutate(sim_number = as.factor(sim_number)) %>%
    dplyr::group_by(sim_number, mu, std) %>%
    dplyr::mutate(x = list(1:n)) %>%
    dplyr::mutate(y = list(stats::rnorm(n, mu, std))) %>%
    dplyr::mutate(d = list(density(unlist(y), n = n)[c("x","y")] %>%
                             purrr::set_names("dx","dy") %>%
                             dplyr::as_tibble())) %>%
    dplyr::mutate(p = list(stats::pnorm(ps, mu, std))) %>%
    dplyr::mutate(q = list(stats::qnorm(qs, mu, std))) %>%
    tidyr::unnest(cols = c(x, y, d, p, q)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(dist_type = paste0("Gaussian: c(", mu, ", ", std, ")")) %>%
    dplyr::mutate(dist_type = as.factor(dist_type)) %>%
    dplyr::select(
      sim_number, dist_type, dplyr::everything()
    ) %>%
    dplyr::arrange(sim_number, dist_type)

  # Attach attributes ----
  attr(df, ".n") <- .n
  attr(df, ".mean") <- .mean
  attr(df, ".sd") <- .sd
  attr(df, ".num_sims") <- .num_sims
  attr(df, "tibble_type") <- "tidy_multi_gaussian"
  attr(df, "ps") <- ps
  attr(df, "qs") <- qs

  # Return ----
  return(df)

}

tn <- tidy_multi_normal(.n = 500,.num_sims = 5)

atb <- attributes(tn)
n <- atb$.n
sims <- atb$.num_sims
dist_type = stringr::str_remove(atb$tibble_type, "tidy_multi_") %>%
  stringr::str_to_title()
sub_title = paste0(
  "Grouped Gaussian - Data Points: ", n, " - ",
  "Simulations: ", sims, "\n",
  "Distribution Family: ", dist_type, "\n",
  "Parameters: ", if(atb$tibble_type == "tidy_multi_gaussian"){
    paste0("Mean: c(", paste0(toString(atb$.mean), ")", " - SD: c(", toString(atb$.sd), ")"))
  }
)

tn %>%
  ggplot2::ggplot(ggplot2::aes(
    x = dx, 
    y = dy, 
    group = interaction(dist_type, sim_number), 
    color = dist_type)) +
  ggplot2::geom_line() +
  ggplot2::theme_minimal() +
  ggplot2::theme(legend.position = "bottom") +
  ggplot2::labs(
    title = "Density Plot",
    subtitle = sub_title,
    x = "",
    y = "Density",
    col

image

> tn %>%
+   group_by(dist_type, sim_number) %>%
+   summarise(mean_mu = mean(y))
`summarise()` has grouped output by 'dist_type'. You can override using the `.groups` argument.
# A tibble: 15 x 3
# Groups:   dist_type [3]
   dist_type          sim_number  mean_mu
   <fct>              <fct>         <dbl>
 1 Gaussian: c(-1, 1) 1          -0.947  
 2 Gaussian: c(-1, 1) 2          -0.971  
 3 Gaussian: c(-1, 1) 3          -1.03   
 4 Gaussian: c(-1, 1) 4          -0.959  
 5 Gaussian: c(-1, 1) 5          -0.981  
 6 Gaussian: c(0, 1)  1           0.00196
 7 Gaussian: c(0, 1)  2           0.00435
 8 Gaussian: c(0, 1)  3           0.0225 
 9 Gaussian: c(0, 1)  4          -0.0188 
10 Gaussian: c(0, 1)  5          -0.0652 
11 Gaussian: c(1, 1)  1           0.936  
12 Gaussian: c(1, 1)  2           0.971  
13 Gaussian: c(1, 1)  3           1.01   
14 Gaussian: c(1, 1)  4           0.916  
15 Gaussian: c(1, 1)  5           0.913  
> tn %>%
+   group_by(dist_type) %>%
+   summarise(mean_mu = mean(y))
# A tibble: 3 x 2
  dist_type          mean_mu
  <fct>                <dbl>
1 Gaussian: c(-1, 1) -0.978 
2 Gaussian: c(0, 1)  -0.0110
3 Gaussian: c(1, 1)   0.950 
spsanderson commented 2 years ago

New function, requires one since it uses do.call from base

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  # Check param ----
  if (is.null(.tidy_dist)){
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' in quotes."
    )
  }

  if(length(.param_list) == 0){
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- params$.n
  num_sims <- params$.num_sims
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  ps <- seq(-n, n-1, 2)
  qs <- seq(0, 1, (1/(n-1)))

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    sim = as.factor(x),
    param_grid
  ) %>%
    group_by_all()

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::mutate(results = list(do.call(td, params))) 

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

OR

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  # Check param ----
  if (is.null(.tidy_dist)){
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' in quotes."
    )
  }

  if(length(.param_list) == 0){
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- params$.n
  num_sims <- params$.num_sims
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  ps <- seq(-n, n-1, 2)
  qs <- seq(0, 1, (1/(n-1)))

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    sim = as.factor(x),
    param_grid
  ) %>%
    group_by_all()

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::mutate(results = list(do.call(td, params))) 

  #df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}
spsanderson commented 2 years ago

Final Function:

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  require("TidyDensity")

  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  )

  #func_parm_list <- as.list(df)
  names(df) <- formalArgs(td)

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::mutate(results = purrr::pmap(dplyr::cur_data(), match.fun(td)))

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}