spsanderson / TidyDensity

Create tidy probability/density tibbles and plots of randomly generated and empirical data.
https://www.spsanderson.com/TidyDensity
Other
34 stars 1 forks source link

Add function `util_burr_stats_tbl` #348

Closed spsanderson closed 11 months ago

spsanderson commented 1 year ago
#' Distribution Statistics
#'
#' @family Burr
#' @family Distribution Statistics
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This function will take in a tibble and returns the statistics
#' of the given type of `tidy_` distribution. It is required that data be
#' passed from a `tidy_` distribution function.
#'
#' @description Returns distribution statistics in a tibble.
#'
#' @param .data The data being passed from a `tidy_` distribution function.
#'
#' @examples
#' library(dplyr)
#'
#' tidy_burr() %>%
#'   util_burr_stats_tbl() %>%
#'   glimpse()
#'
#' @return
#' A tibble
#' 
#' @name util_burr_stats_tbl
NULL

#' @export
#' @rdname util_burr_stats_tbl

util_burr_stats_tbl <- function(.data) {

  # Immediate check for tidy_ distribution function
  if (!"tibble_type" %in% names(attributes(.data))) {
    rlang::abort(
      message = "You must pass data from the 'tidy_dist' function.",
      use_cli_format = TRUE
    )
  }

  if (attributes(.data)$tibble_type != "tidy_burr") {
    rlang::abort(
      message = "You must use 'tidy_burr()'",
      use_cli_format = TRUE
    )
  }

  # Data
  data_tbl <- dplyr::as_tibble(.data)

  atb <- attributes(data_tbl)
  s1 <- atb$.shape1
  s2 <- atb$.shape2
  r  <- atb$.rate
  sc <- 1/r

  stat_mean <- s2 + sc * (s1 / (s1 + 1))
  stat_mode <- ((s2 - 1)/((s1*s2) + 1))^(1/s2)
  stat_median <- sc * actuar::qburr(p = 0.5, shape1 = s1, shape2 = 1)
  stat_skewness <- (4 * sqrt(6) * (s1 + 2)) / (s1 * pi ^ (3/2)*(s1 + 3))
  stat_kurtosis <- 3 * (s1^2 + 6 * s1 + 5) / (s1 * (s1 + 1)^2)
  stat_coef_var <- (-1 * stat_mean^2) + stat_mean

  # Data Tibble
  ret <- dplyr::tibble(
    tidy_function = atb$tibble_type,
    function_call = atb$dist_with_params,
    distribution = dist_type_extractor(atb$tibble_type),
    distribution_type = atb$distribution_family_type,
    points = atb$.n,
    simulations = atb$.num_sims,
    mean = stat_mean,
    mode = stat_mode,
    median = stat_median,
    coeff_var = stat_coef_var,
    skewness = stat_skewness,
    kurtosis = stat_kurtosis,
    computed_std_skew = tidy_skewness_vec(data_tbl$y),
    computed_std_kurt = tidy_kurtosis_vec(data_tbl$y),
    ci_lo = ci_lo(data_tbl$y),
    ci_hi = ci_hi(data_tbl$y)
  )

  # Return
  return(ret)
}

Example:

> tidy_burr() |>
+   util_burr_stats_tbl() |>
+   glimpse()
Rows: 1
Columns: 16
$ tidy_function     <chr> "tidy_burr"
$ function_call     <chr> "Burr c(1, 1, 1, 1)"
$ distribution      <chr> "Burr"
$ distribution_type <chr> "continuous"
$ points            <dbl> 50
$ simulations       <dbl> 1
$ mean              <dbl> 1.5
$ mode              <dbl> 0
$ median            <dbl> 1
$ coeff_var         <dbl> -0.75
$ skewness          <dbl> 1.31969
$ kurtosis          <dbl> 9
$ computed_std_skew <dbl> 2.961009
$ computed_std_kurt <dbl> 11.49156
$ ci_lo             <dbl> 0.06874956
$ ci_hi             <dbl> 19.57948
spsanderson commented 1 year ago

Possible:

burr_stats <- function(alpha, beta, gamma, delta) {
#  Args:
#    alpha: The shape parameter of the Burr distribution.
#    beta: The scale parameter of the Burr distribution.
#    gamma: The location parameter of the Burr distribution.
#    delta: The threshold parameter of the Burr distribution.

#  Returns:
#    A list containing the mean, median, mode, variance, skewness, and kurtosis of the Burr distribution.

  mean <- gamma + beta * (alpha / (alpha + 1))
  median <- gamma + beta * (log(2) / alpha) ** (1 / alpha)
  mode <- gamma + beta * (1 / alpha) ** (1 / alpha)
  variance <- beta ** 2 * (alpha * pi ** 2 / (6 * (alpha + 1) ** 2))
  skewness <- (4 * sqrt(6) * (alpha + 1) * (alpha + 2)) / (alpha * pi ** (3 / 2) * (alpha + 3))
  kurtosis <- 3 * (alpha ** 2 + 6 * alpha + 5) / (alpha * (alpha + 1) ** 2)

  return(list(mean = mean, median = median, mode = mode, variance = variance, skewness = skewness, kurtosis = kurtosis))
}
library(TidyDensity)

x <- tidy_burr()$y

p <- util_burr_param_estimate(x)$parameter_tbl

s1 <- p$shape1
s2 <- p$shape2
r  <- p$rate
sc <- 1/r

mu <- s2 + sc * (s1 / (s1 + 1))
mn <- (2 * (1/s1) - 1)^(1/s2)
md <- ((s2 - 1)/((s1*s2) + 1))^(1/s2)
va <- -1 * mu
sk <- (4 * sqrt(6) * (s1 + 2)) / (s1 * pi ^ (3/2)*(s1 + 3))
ku <- 3 * (s1^2 + 6 * s1 + 5) / (s1 * (s1 + 1)^2)

Documentation: https://en.wikipedia.org/wiki/Burr_distribution https://www.causascientia.org/math_stat/Dists/Compendium.pdf https://en.wikipedia.org/wiki/Beta_function#Software_implementation