easystats / bayestestR

:ghost: Utilities for analyzing Bayesian models and posterior distributions
https://easystats.github.io/bayestestR/
GNU General Public License v3.0
563 stars 55 forks source link

support `rvar` columns in data.frames #673

Closed mattansb closed 3 weeks ago

mattansb commented 3 weeks ago

Closes #604

``` r
library(bayestestR)

dfx <- data.frame(mu = c(0, 0.5, 1), sigma = c(1, 0.5, 0.25))
dfx$my_rvar <- posterior::rvar_rng(rnorm, 3, mean = dfx$mu, sd = dfx$sigma)
dfx$other_rvar <- posterior::rvar_rng(rnorm, 3, mean = dfx$mu + 0.5, sd = dfx$sigma - 0.1)
dfx
#>    mu sigma     my_rvar  other_rvar
#> 1 0.0  1.00 0.01 ± 0.99 0.49 ± 0.91
#> 2 0.5  0.50 0.51 ± 0.49 1.00 ± 0.40
#> 3 1.0  0.25 1.00 ± 0.25 1.50 ± 0.15

map_estimate(dfx, rvar_col = "my_rvar")
#> MAP Estimate
#> 
#> mu   | sigma | MAP_Estimate
#> ---------------------------
#> 0.00 |  1.00 |         0.12
#> 0.50 |  0.50 |         0.52
#> 1.00 |  0.25 |         1.00

point_estimate(dfx, rvar_col = "my_rvar")
#> Point Estimate
#> 
#> mu   | sigma |   Median | Mean |  MAP
#> -------------------------------------
#> 0.00 |  1.00 | 6.21e-03 | 0.01 | 0.12
#> 0.50 |  0.50 |     0.50 | 0.51 | 0.52
#> 1.00 |  0.25 |     1.00 | 1.00 | 1.00

eti(dfx, rvar_col = "my_rvar")
#> Equal-Tailed Interval
#> 
#> mu   | sigma |       95% ETI
#> ----------------------------
#> 0.00 |  1.00 | [-1.87, 1.99]
#> 0.50 |  0.50 | [-0.44, 1.46]
#> 1.00 |  0.25 | [ 0.52, 1.48]

bci(dfx, rvar_col = "my_rvar")
#>    mu sigma   CI     CI_low  CI_high
#> 1 0.0  1.00 0.95 -1.8677618 1.999030
#> 2 0.5  0.50 0.95 -0.4308229 1.464526
#> 3 1.0  0.25 0.95  0.5241453 1.489345

hdi(dfx, rvar_col = "my_rvar")
#> Highest Density Interval
#> 
#> mu   | sigma |       95% HDI
#> ----------------------------
#> 0.00 |  1.00 | [-1.81, 2.03]
#> 0.50 |  0.50 | [-0.46, 1.43]
#> 1.00 |  0.25 | [ 0.52, 1.47]

spi(dfx, rvar_col = "my_rvar")
#> Shortest Probability Interval
#> 
#> mu   | sigma |       95% SPI
#> ----------------------------
#> 0.00 |  1.00 | [-1.78, 2.07]
#> 0.50 |  0.50 | [-0.46, 1.43]
#> 1.00 |  0.25 | [ 0.51, 1.47]

ci(dfx, rvar_col = "my_rvar")
#> Equal-Tailed Interval
#> 
#> mu   | sigma |       95% ETI
#> ----------------------------
#> 0.00 |  1.00 | [-1.87, 1.99]
#> 0.50 |  0.50 | [-0.44, 1.46]
#> 1.00 |  0.25 | [ 0.52, 1.48]

p_direction(dfx, rvar_col = "my_rvar")
#> Probability of Direction
#> 
#> mu   | sigma |     pd
#> ---------------------
#> 0.00 |  1.00 | 50.30%
#> 0.50 |  0.50 | 84.78%
#> 1.00 |  0.25 |   100%

p_map(dfx, rvar_col = "my_rvar")
#> MAP-based p-value
#> 
#> mu   | sigma | p (MAP)
#> ----------------------
#> 0.00 |  1.00 |  0.997 
#> 0.50 |  0.50 |  0.634 
#> 1.00 |  0.25 |  < .001

p_rope(dfx, rvar_col = "my_rvar", range = c(-1, 1))
#> Proportion of samples inside the ROPE [-1.00, 1.00]
#> 
#> mu   | sigma | p (ROPE)
#> -----------------------
#> 0.00 |  1.00 |    0.685
#> 0.50 |  0.50 |    0.837
#> 1.00 |  0.25 |    0.501

p_significance(dfx, rvar_col = "my_rvar", threshold = 0.5)
#> Practical Significance (threshold: 0.50)
#> 
#> mu   | sigma |   ps
#> -------------------
#> 0.00 |  1.00 | 0.31
#> 0.50 |  0.50 | 0.50
#> 1.00 |  0.25 | 0.98

rope(dfx, rvar_col = "my_rvar", range = c(-1, 0.5))
#> # Proportion of samples inside the ROPE [-1.00, 0.50]:
#> 
#>   mu | sigma | inside ROPE
#> --------------------------
#> 0.00 |  1.00 |     55.89 %
#> 0.50 |  0.50 |     49.79 %
#> 1.00 |  0.25 |      0.00 %

equivalence_test(dfx, rvar_col = "my_rvar", range = c(-1, 0.5), ci = 0.8)
#> # Test for Practical Equivalence
#> 
#>   ROPE: [-1.00 0.50]
#> 
#>   mu | sigma |        H0 | inside ROPE |      80% HDI
#> -----------------------------------------------------
#> 0.00 |  1.00 | Undecided |     66.38 % | [-1.26 1.32]
#> 0.50 |  0.50 | Undecided |     49.75 % | [-0.13 1.14]
#> 1.00 |  0.25 |  Rejected |      0.00 % | [ 0.68 1.32]

estimate_density(dfx, rvar_col = "my_rvar") |> head()
#>   mu sigma         x            y
#> 1  0     1 -3.566836 0.0007033624
#> 2  0     1 -3.560231 0.0007158896
#> 3  0     1 -3.553626 0.0007284776
#> 4  0     1 -3.547021 0.0007411670
#> 5  0     1 -3.540416 0.0007539585
#> 6  0     1 -3.533811 0.0007668695

describe_posterior(dfx, rvar_col = "my_rvar",
                   centrality = "MAP", ci_method = "hdi", ci = 0.8,
                   test = c("pd", "p_map", "rope"), rope_ci = 1, rope_range = c(-1, 0.5))
#> Summary of Posterior Distribution
#> 
#> mu   | sigma |  MAP |        80% CI | p (MAP) |     pd |          ROPE | % in ROPE
#> ----------------------------------------------------------------------------------
#> 0.00 |  1.00 | 0.12 | [-1.38, 1.15] |  0.997  | 50.30% | [-1.00, 0.50] |    53.10%
#> 0.50 |  0.50 | 0.52 | [-0.11, 1.17] |  0.634  | 84.78% | [-1.00, 0.50] |    49.75%
#> 1.00 |  0.25 | 1.00 | [ 0.70, 1.34] |  < .001 |   100% | [-1.00, 0.50] |     1.95%

bayesfactor_parameters(dfx, rvar_col = "my_rvar")
#> Warning: Prior not specified! Please specify priors (with column order matching
#>   'posterior') to get meaningful results.
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
#> Bayes Factor (Savage-Dickey density ratio)
#> 
#> mu    | sigma |   BF
#> --------------------
#> 0.000 | 1.000 | 1.00
#> 0.500 | 0.500 | 1.00
#> 1.000 | 0.250 | 1.00
#> 
#> * Evidence Against The Null: 0

bayesfactor_parameters(dfx, rvar_col = "my_rvar", prior = "other_rvar")
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
#> Bayes Factor (Savage-Dickey density ratio)
#> 
#> mu    | sigma |       BF
#> ------------------------
#> 0.000 | 1.000 |     1.01
#> 0.500 | 0.500 |    0.088
#> 1.000 | 0.250 | 1.34e-06
#> 
#> * Evidence Against The Null: 0

bayesfactor_restricted(dfx, rvar_col = "my_rvar",
                       hypothesis = "`x[1]` < `x[3]`")
#> Warning: Prior not specified! 
#>   Please specify priors (with column names matching 'posterior')
#>    to get meaningful results.
#> Bayes Factor (Order-Restriction)
#> 
#> Hypothesis      P(Prior) P(Posterior)   BF
#> `x[1]` < `x[3]`     0.83         0.83 1.00
#> 
#> * Bayes factors for the restricted model vs. the un-restricted model.

bayesfactor_restricted(dfx, rvar_col = "my_rvar", prior = "other_rvar",
                       hypothesis = "`x[1]` < `x[3]`")
#> Bayes Factor (Order-Restriction)
#> 
#> Hypothesis      P(Prior) P(Posterior)    BF
#> `x[1]` < `x[3]`     0.85         0.83 0.975
#> 
#> * Bayes factors for the restricted model vs. the un-restricted model.

si(dfx, rvar_col = "my_rvar", prior = "other_rvar", BF = c(1, 10))
#> Warning: Support intervals might not be precise.
#>   For precise support intervals, sampling at least 40,000 posterior
#>   samples is recommended.
#> Support Interval
#> 
#> Parameter |      BF = 1 SI |    BF = 10 SI
#> ------------------------------------------
#> x[1]      | [-3.94, -0.03] |              
#> x[2]      | [-1.44,  0.68] | [-1.44, 0.03]
#> x[3]      | [ 0.16,  1.28] | [ 0.16, 1.14]

Created on 2024-09-05 with reprex v2.1.0

2DO

mattansb commented 3 weeks ago

I'm done - read to merge if you approve @strengejacke 👍

strengejacke commented 3 weeks ago

I fixed some lintrs. There seems to be a problem with the docs, see html-check that fails:

❯ checking HTML version of manual ... NOTE
  Found the following HTML validation problems:
  bayesfactor_parameters.html:300:38 (bayesfactor_parameters.Rd:245): Warning: nested emphasis <em>
  bayesfactor_restricted.html:200:38 (bayesfactor_restricted.Rd:138): Warning: nested emphasis <em>
  si.html:234:38 (si.Rd:166): Warning: nested emphasis <em>

can you look into this? Note that you need to pull changes first...