spsanderson / TidyDensity

Create tidy probability/density tibbles and plots of randomly generated and empirical data.
https://www.spsanderson.com/TidyDensity
Other
34 stars 1 forks source link

binomial #134

Closed spsanderson closed 2 years ago

spsanderson commented 2 years ago

https://www.itl.nist.gov/div898/handbook/eda/section3/eda366i.htm

spsanderson commented 2 years ago

Function:

#' Distribution Statistics
#' 
#' @family Binomial
#' @fmaily Distribution Statistics
#' 
#' @author Steven P. Sanderson II, MPH
#' 
#' @details This function will take in a tibble and returns the statistics
#' of the given type of `tidy_` distribution. It is required that data be
#' passed from a `tidy_` distribution function.
#' 
#' @description Returns distribution statistics in a tibble.
#' 
#' @param .data The data being passed from a `tidy_` distribution function.
#' 
#' @examples 
#' tidy_binomial() %>%
#'   util_binomial_stats_tbl()
#' 
#' @return 
#' A tibble
#' 
#' @export
#' 

util_binomial_stats_tbl <- function(.data){

  # Immediate check for tidy_ distribution function
  if (!"tibble_type" %in% names(attributes(.data))){
    rlang::abort(
      message = "You must pass data from the 'tidy_dist' function.",
      use_cli_format = TRUE
    )
  }

  if (attributes(.data)$tibble_type != "tidy_binomial"){
    rlang::abort(
      message = "You must use 'tidy_binomial()'",
      use_cli_format = TRUE
    )
  }

  # Data
  data_tbl <- tibble::as_tibble(.data)

  atb <- attributes(data_tbl)
  n <- atb$.size
  p <- atb$.prob

  stat_mean   <- n*p
  stat_mode   <- c(p * (n + 1) - 1, p * (n + 1))
  stat_sd     <- sqrt( (p*q)/((p+q)^2 * (p + q + 1)) )
  stat_skewness <- (1 - 2*p)/sqrt((n*p) * (1-p))
  stat_kurtosis <- 3 - 6/n + 1/((n*p) * (1 - p))
  stat_coef_var <- sqrt((1-p)/(n*p))

  # Data Tibble
  ret <- tibble::tibble(
    tidy_function = atb$tibble_type,
    function_call = atb$dist_with_params,
    distribution = atb$tibble_type %>% 
      stringr::str_remove("tidy_") %>% 
      stringr::str_to_title(),
    distribution_type = atb$distribution_family_type,
    points = atb$.n,
    simulations = atb$.num_sims,
    mean = stat_mean,
    mode_lower = stat_mode[[1]],
    mode_upper = stat_mode[[2]],
    range = paste0("0 to ", n),
    std_dv = stat_sd,
    coeff_var = stat_coef_var,
    skewness = stat_skewness,
    kurtosis = stat_kurtosis,
    computed_std_skew = tidy_skewness_vec(data_tbl$y),
    computed_std_kurt = tidy_kurtosis_vec(data_tbl$y)
  )

  # Return
  return(ret)

}

Example:

tidy_binomial(.size = 2, .prob = 0.25) %>%
  util_binomial_stats_tbl() %>%
  glimpse()

Rows: 1
Columns: 16
$ tidy_function     <chr> "tidy_binomial"
$ function_call     <chr> "Binomial c(2, 0.25)"
$ distribution      <chr> "Binomial"
$ distribution_type <chr> "discrete"
$ points            <dbl> 50
$ simulations       <dbl> 1
$ mean              <dbl> 0.5
$ mode_lower        <dbl> -0.25
$ mode_upper        <dbl> 0.75
$ range             <chr> "0 to 2"
$ std_dv            <dbl> 0.2666667
$ coeff_var         <dbl> 1.224745
$ skewness          <dbl> 0.8164966
$ kurtosis          <dbl> 2.666667
$ computed_std_skew <dbl> 0.5466228
$ computed_std_kurt <dbl> 2.224713