easystats / bayestestR

:ghost: Utilities for analyzing Bayesian models and posterior distributions
https://easystats.github.io/bayestestR/
GNU General Public License v3.0
563 stars 55 forks source link

Better emmeans outputs #672

Closed mattansb closed 3 weeks ago

mattansb commented 4 weeks ago

Closes #661, #670

library(bayestestR)
library(emmeans)
#> Welcome to emmeans.
#> Caution: You lose important information if you filter this package's results.
#> See '? untidy'
library(insight)
library(rstanarm)
#> Loading required package: Rcpp
#> This is rstanarm version 2.32.1
#> - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
#> - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
#> - For execution on a local, multicore CPU with excess RAM we recommend calling
#>   options(mc.cores = parallel::detectCores())

mod <- stan_glm(mpg ~ hp + factor(cyl), data = mtcars, refresh = 0)
modp <- unupdate(mod)
#> Sampling priors, please wait...

em1 <- emmeans(mod, ~ cyl | hp, at = list(hp = c(50, 100)))
emc <- pairs(em1)

# debugonce(bayestestR:::.append_datagrid)

describe_posterior(em1, test = NULL)
#> Summary of Posterior Distribution
#> 
#> cyl  |     hp | Median |         95% CI
#> ---------------------------------------
#> 4.00 |  50.00 |  27.45 | [25.24, 29.59]
#> 6.00 |  50.00 |  21.47 | [18.08, 24.81]
#> 8.00 |  50.00 |  18.84 | [13.59, 24.44]
#> 4.00 | 100.00 |  26.22 | [24.14, 28.27]
#> 6.00 | 100.00 |  20.27 | [17.66, 22.89]
#> 8.00 | 100.00 |  17.66 | [13.85, 21.74]
point_estimate(em1)
#> Point Estimate
#> 
#> cyl  |     hp | Median |  Mean |   MAP
#> --------------------------------------
#> 4.00 |  50.00 |  27.45 | 27.41 | 27.64
#> 6.00 |  50.00 |  21.47 | 21.46 | 21.05
#> 8.00 |  50.00 |  18.84 | 18.90 | 18.39
#> 4.00 | 100.00 |  26.22 | 26.22 | 26.21
#> 6.00 | 100.00 |  20.27 | 20.26 | 20.55
#> 8.00 | 100.00 |  17.66 | 17.71 | 17.38
map_estimate(em1)
#> MAP Estimate
#> 
#> cyl  |     hp | MAP_Estimate
#> ----------------------------
#> 4.00 |  50.00 |        27.64
#> 6.00 |  50.00 |        21.05
#> 8.00 |  50.00 |        18.39
#> 4.00 | 100.00 |        26.21
#> 6.00 | 100.00 |        20.55
#> 8.00 | 100.00 |        17.38
eti(em1)
#> Equal-Tailed Interval
#> 
#> cyl  |     hp |        95% ETI
#> ------------------------------
#> 4.00 |  50.00 | [25.24, 29.59]
#> 6.00 |  50.00 | [18.08, 24.81]
#> 8.00 |  50.00 | [13.59, 24.44]
#> 4.00 | 100.00 | [24.14, 28.27]
#> 6.00 | 100.00 | [17.66, 22.89]
#> 8.00 | 100.00 | [13.85, 21.74]
hdi(em1)
#> Highest Density Interval
#> 
#> cyl  |     hp |        95% HDI
#> ------------------------------
#> 4.00 |  50.00 | [25.25, 29.59]
#> 6.00 |  50.00 | [18.21, 24.94]
#> 8.00 |  50.00 | [13.62, 24.45]
#> 4.00 | 100.00 | [24.13, 28.24]
#> 6.00 | 100.00 | [17.66, 22.89]
#> 8.00 | 100.00 | [13.69, 21.48]
ci(em1)
#> Equal-Tailed Interval
#> 
#> cyl  |     hp |        95% ETI
#> ------------------------------
#> 4.00 |  50.00 | [25.24, 29.59]
#> 6.00 |  50.00 | [18.08, 24.81]
#> 8.00 |  50.00 | [13.59, 24.44]
#> 4.00 | 100.00 | [24.14, 28.27]
#> 6.00 | 100.00 | [17.66, 22.89]
#> 8.00 | 100.00 | [13.85, 21.74]
bci(em1)
#>   cyl  hp   CI   CI_low  CI_high
#> 1   4  50 0.95 25.15104 29.54021
#> 2   6  50 0.95 18.01651 24.76483
#> 3   8  50 0.95 13.70008 24.55040
#> 4   4 100 0.95 24.14164 28.26552
#> 5   6 100 0.95 17.65731 22.88807
#> 6   8 100 0.95 13.97831 21.90116
spi(em1)
#> Shortest Probability Interval
#> 
#> cyl  |     hp |        95% SPI
#> ------------------------------
#> 4.00 |  50.00 | [25.25, 29.60]
#> 6.00 |  50.00 | [18.07, 24.80]
#> 8.00 |  50.00 | [13.10, 23.94]
#> 4.00 | 100.00 | [24.13, 28.24]
#> 6.00 | 100.00 | [17.65, 22.88]
#> 8.00 | 100.00 | [13.68, 21.48]
si(em1, prior = modp)
#> Warning: Support intervals might not be precise.
#>   For precise support intervals, sampling at least 40,000 posterior
#>   samples is recommended.
#> Support Interval
#> 
#> cyl  |     hp |      BF = 1 SI
#> ------------------------------
#> 4.00 |  50.00 | [24.58, 30.17]
#> 6.00 |  50.00 | [17.29, 25.64]
#> 8.00 |  50.00 | [13.06, 24.89]
#> 4.00 | 100.00 | [23.63, 28.85]
#> 6.00 | 100.00 | [16.96, 23.40]
#> 8.00 | 100.00 | [13.29, 21.85]
estimate_density(em1) |> head(n = 15)
#>    cyl hp        x           y
#> 1    4 50 23.45954 0.001691268
#> 2    4 50 23.46792 0.001722847
#> 3    4 50 23.47631 0.001753208
#> 4    4 50 23.48469 0.001782290
#> 5    4 50 23.49308 0.001809716
#> 6    4 50 23.50146 0.001835769
#> 7    4 50 23.50984 0.001860421
#> 8    4 50 23.51823 0.001883648
#> 9    4 50 23.52661 0.001905435
#> 10   4 50 23.53500 0.001925727
#> 11   4 50 23.54338 0.001944235
#> 12   4 50 23.55177 0.001961297
#> 13   4 50 23.56015 0.001976928
#> 14   4 50 23.56853 0.001991148
#> 15   4 50 23.57692 0.002003985

p_rope(emc)
#> Proportion of samples inside the ROPE [-0.10, 0.10]
#> 
#> contrast    |     hp | p (ROPE)
#> -------------------------------
#> cyl4 - cyl6 |  50.00 |   < .001
#> cyl4 - cyl8 |  50.00 |   < .001
#> cyl6 - cyl8 |  50.00 |   0.015 
#> cyl4 - cyl6 | 100.00 |   < .001
#> cyl4 - cyl8 | 100.00 |   < .001
#> cyl6 - cyl8 | 100.00 |   0.015
p_map(emc)
#> MAP-based p-value
#> 
#> contrast    |     hp | p (MAP)
#> ------------------------------
#> cyl4 - cyl6 |  50.00 |   0.004
#> cyl4 - cyl8 |  50.00 |   0.014
#> cyl6 - cyl8 |  50.00 |   0.414
#> cyl4 - cyl6 | 100.00 |   0.004
#> cyl4 - cyl8 | 100.00 |   0.014
#> cyl6 - cyl8 | 100.00 |   0.414
p_direction(emc)
#> Probability of Direction
#> 
#> contrast    |     hp |     pd
#> -----------------------------
#> cyl4 - cyl6 |  50.00 | 99.98%
#> cyl4 - cyl8 |  50.00 | 99.83%
#> cyl6 - cyl8 |  50.00 | 89.40%
#> cyl4 - cyl6 | 100.00 | 99.98%
#> cyl4 - cyl8 | 100.00 | 99.83%
#> cyl6 - cyl8 | 100.00 | 89.40%
p_significance(emc)
#> Practical Significance (threshold: 0.10)
#> 
#> contrast    |     hp |   ps
#> ---------------------------
#> cyl4 - cyl6 |  50.00 | 1.00
#> cyl4 - cyl8 |  50.00 | 1.00
#> cyl6 - cyl8 |  50.00 | 0.89
#> cyl4 - cyl6 | 100.00 | 1.00
#> cyl4 - cyl8 | 100.00 | 1.00
#> cyl6 - cyl8 | 100.00 | 0.89
rope(emc)
#> # Proportion of samples inside the ROPE [-0.10, 0.10]:
#> 
#> contrast    |  hp | inside ROPE
#> -------------------------------
#> cyl4 - cyl6 |  50 |      0.00 %
#> cyl4 - cyl8 |  50 |      0.00 %
#> cyl6 - cyl8 |  50 |      1.61 %
#> cyl4 - cyl6 | 100 |      0.00 %
#> cyl4 - cyl8 | 100 |      0.00 %
#> cyl6 - cyl8 | 100 |      1.61 %
equivalence_test(emc)
#> # Test for Practical Equivalence
#> 
#>   ROPE: [-0.10 0.10]
#> 
#> contrast    |  hp |        H0 | inside ROPE |       95% HDI
#> -----------------------------------------------------------
#> cyl4 - cyl6 |  50 |  Rejected |      0.00 % | [ 2.47  9.47]
#> cyl4 - cyl8 |  50 |  Rejected |      0.00 % | [ 3.57 13.51]
#> cyl6 - cyl8 |  50 | Undecided |      1.61 % | [-1.68  6.62]
#> cyl4 - cyl6 | 100 |  Rejected |      0.00 % | [ 2.47  9.47]
#> cyl4 - cyl8 | 100 |  Rejected |      0.00 % | [ 3.57 13.51]
#> cyl6 - cyl8 | 100 | Undecided |      1.61 % | [-1.68  6.62]
bayesfactor_parameters(emc, prior = modp)
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
#> Bayes Factor (Savage-Dickey density ratio)
#> 
#> contrast    |      hp |    BF
#> -----------------------------
#> cyl4 - cyl6 |  50.000 |  8.23
#> cyl4 - cyl8 |  50.000 |  9.00
#> cyl6 - cyl8 |  50.000 | 0.102
#> cyl4 - cyl6 | 100.000 |  8.23
#> cyl4 - cyl8 | 100.000 |  9.00
#> cyl6 - cyl8 | 100.000 | 0.102
#> 
#> * Evidence Against The Null: 0
bayesfactor_restricted(emc, prior = modp, hypothesis = "`cyl4 - cyl6 50` < `cyl4 - cyl8 50`") # unchanged
#> Bayes Factor (Order-Restriction)
#> 
#> Hypothesis                          P(Prior) P(Posterior)   BF
#> `cyl4 - cyl6 50` < `cyl4 - cyl8 50`     0.49         0.89 1.82
#> 
#> * Bayes factors for the restricted model vs. the un-restricted model.

Created on 2024-09-03 with reprex v2.1.0

mattansb commented 4 weeks ago

This might break somethings in {see} - I'm not too familiar with what was previously working. @strengejacke @DominiqueMakowski care to point me in the right direction?

strengejacke commented 4 weeks ago

Do you have an example what might be broken in see? @IndrajeetPatil have you already submitted see?

IndrajeetPatil commented 4 weeks ago

No, not yet.

Should I wait?

mattansb commented 4 weeks ago

I'll get some non-working examples tomorrow.

strengejacke commented 4 weeks ago

No, not yet.

Should I wait?

Yeah, maybe we can already take the forthcoming bayestestR changes into account, and submit bayestestR after see.

IndrajeetPatil commented 4 weeks ago

SGTM.

mattansb commented 4 weeks ago

Alright, going off of https://easystats.github.io/see/articles/bayestestR.html, we have:

library(bayestestR)
library(emmeans)
library(insight)
library(rstanarm)

mod <- stan_glm(mpg ~ hp + factor(cyl), data = mtcars, refresh = 0)
modp <- unupdate(mod)
#> Sampling priors, please wait...

em1 <- emmeans(mod, ~ cyl | hp, at = list(hp = c(50, 100)))

Plots that work

result <- point_estimate(em1)
plot(result)

result <- rope(em1)
plot(result)


result <- bayesfactor_parameters(em1, prior = modp)
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
plot(result)

Plots that don't fail but are wrong:

result <- si(em1, prior = modp)
#> Warning: Support intervals might not be precise.
#>   For precise support intervals, sampling at least 40,000 posterior
#>   samples is recommended.
plot(result)

result <- estimate_density(em1)
plot(result)

Plots that error

result <- describe_posterior(em1)
plot(result)
#> Error in `scale_y_continuous()`:
#> ! Discrete values supplied to continuous scale.
#> ℹ Example values: Distribution, Distribution, Distribution, Distribution, and
#>   Distribution
result <- p_direction(em1)
plot(result)
#> Error in do.call(rbind, by(dataplot, list(dataplot$y, dataplot$fill), : second argument must be a list
result <- p_significance(em1)
plot(result)
#> Error in do.call(rbind, by(dataplot, list(dataplot$y, dataplot$fill), : second argument must be a list
result <- hdi(em1)
plot(result)
#> Error in `ggridges::geom_ridgeline_gradient()`:
#> ! Problem while computing aesthetics.
#> ℹ Error occurred in the 1st layer.
#> Caused by error in `.data$x`:
#> ! Column `x` not found in `.data`.
result <- equivalence_test(em1)
plot(result)
#> Error in `[.emmGrid`(data, , i$Parameter, drop = FALSE): argument "i" is missing, with no default

Created on 2024-09-03 with reprex v2.1.0

mattansb commented 4 weeks ago

Yeah, maybe we can already take the forthcoming bayestestR changes into account, and submit bayestestR after see.

insight would have to be submitted first.

strengejacke commented 4 weeks ago

ok, insight was just updated one or two days ago... What would you suggest? Release see as planned, and keep this for the next update round?

strengejacke commented 4 weeks ago

I opened a PR (https://github.com/easystats/see/pull/360) for testing purposes, to see if the current changes in insight and this PR break see or not.

mattansb commented 3 weeks ago

I think we can merge to be sent to CRAN on the next round of updates (I will also make a similar PR here for marginaleffects). In the meanwhile, we can open an issue on see to fix these plot (if these ever worked in the first place)

mattansb commented 3 weeks ago

Added most of the support for marginaleffects:

library(bayestestR)
library(marginaleffects)
#> Warning: package 'marginaleffects' was built under R version 4.3.3
library(insight)
library(rstanarm)
#> Loading required package: Rcpp
#> This is rstanarm version 2.32.1
#> - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
#> - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
#> - For execution on a local, multicore CPU with excess RAM we recommend calling
#>   options(mc.cores = parallel::detectCores())

mod <- stan_glm(mpg ~ hp + factor(cyl), data = mtcars, refresh = 0)
modp <- unupdate(mod)
#> Sampling priors, please wait...

em1 <- avg_predictions(mod, variables = list("cyl" = unique, hp = c(50, 100)))

point_estimate(em1)
#> Point Estimate
#> 
#> cyl |     hp | Median |  Mean |   MAP
#> -------------------------------------
#> 4   |  50.00 |  27.41 | 27.43 | 27.34
#> 4   | 100.00 |  26.23 | 26.23 | 26.20
#> 6   |  50.00 |  21.51 | 21.52 | 21.58
#> 6   | 100.00 |  20.32 | 20.33 | 20.34
#> 8   |  50.00 |  18.88 | 18.93 | 19.12
#> 8   | 100.00 |  17.70 | 17.73 | 17.10

map_estimate(em1)
#> MAP Estimate
#> 
#> cyl |     hp | MAP_Estimate
#> ---------------------------
#> 4   |  50.00 |        27.34
#> 4   | 100.00 |        26.20
#> 6   |  50.00 |        21.58
#> 6   | 100.00 |        20.34
#> 8   |  50.00 |        19.12
#> 8   | 100.00 |        17.10

eti(em1)
#> Equal-Tailed Interval
#> 
#> cyl |     hp |        95% ETI
#> -----------------------------
#> 4   |  50.00 | [25.20, 29.69]
#> 4   | 100.00 | [24.18, 28.18]
#> 6   |  50.00 | [18.26, 24.88]
#> 6   | 100.00 | [17.80, 22.94]
#> 8   |  50.00 | [13.79, 24.29]
#> 8   | 100.00 | [14.01, 21.60]

hdi(em1)
#> Highest Density Interval
#> 
#> cyl |     hp |        95% HDI
#> -----------------------------
#> 4   |  50.00 | [25.16, 29.60]
#> 4   | 100.00 | [24.18, 28.18]
#> 6   |  50.00 | [18.34, 24.94]
#> 6   | 100.00 | [17.73, 22.83]
#> 8   |  50.00 | [14.13, 24.50]
#> 8   | 100.00 | [14.16, 21.71]

ci(em1)
#> Equal-Tailed Interval
#> 
#> cyl |     hp |        95% ETI
#> -----------------------------
#> 4   |  50.00 | [25.20, 29.69]
#> 4   | 100.00 | [24.18, 28.18]
#> 6   |  50.00 | [18.26, 24.88]
#> 6   | 100.00 | [17.80, 22.94]
#> 8   |  50.00 | [13.79, 24.29]
#> 8   | 100.00 | [14.01, 21.60]

bci(em1)
#>   cyl  hp   CI   CI_low  CI_high
#> 1   4  50 0.95 25.22676 29.74384
#> 2   4 100 0.95 24.16353 28.17122
#> 3   6  50 0.95 18.29945 24.89895
#> 4   6 100 0.95 17.81465 22.97405
#> 5   8  50 0.95 13.93223 24.39837
#> 6   8 100 0.95 14.12260 21.66458

spi(em1)
#> Shortest Probability Interval
#> 
#> cyl |     hp |        95% SPI
#> -----------------------------
#> 4   |  50.00 | [25.16, 29.60]
#> 4   | 100.00 | [24.18, 28.18]
#> 6   |  50.00 | [18.34, 24.94]
#> 6   | 100.00 | [17.72, 22.83]
#> 8   |  50.00 | [14.12, 24.50]
#> 8   | 100.00 | [14.12, 21.67]

p_rope(em1)
#> Proportion of samples inside the ROPE [-0.10, 0.10]
#> 
#> cyl |     hp | p (ROPE)
#> -----------------------
#> 4   |  50.00 |   < .001
#> 4   | 100.00 |   < .001
#> 6   |  50.00 |   < .001
#> 6   | 100.00 |   < .001
#> 8   |  50.00 |   < .001
#> 8   | 100.00 |   < .001

p_map(em1)
#> MAP-based p-value
#> 
#> cyl |     hp | p (MAP)
#> ----------------------
#> 4   |  50.00 |  < .001
#> 4   | 100.00 |  < .001
#> 6   |  50.00 |  < .001
#> 6   | 100.00 |  < .001
#> 8   |  50.00 |  < .001
#> 8   | 100.00 |  < .001

p_direction(em1)
#> Probability of Direction
#> 
#> cyl |     hp |   pd
#> -------------------
#> 4   |  50.00 | 100%
#> 4   | 100.00 | 100%
#> 6   |  50.00 | 100%
#> 6   | 100.00 | 100%
#> 8   |  50.00 | 100%
#> 8   | 100.00 | 100%

p_significance(em1)
#> Practical Significance (threshold: 0.10)
#> 
#> cyl |     hp |   ps
#> -------------------
#> 4   |  50.00 | 1.00
#> 4   | 100.00 | 1.00
#> 6   |  50.00 | 1.00
#> 6   | 100.00 | 1.00
#> 8   |  50.00 | 1.00
#> 8   | 100.00 | 1.00

rope(em1)
#> # Proportion of samples inside the ROPE [-0.10, 0.10]:
#> 
#> cyl |  hp | inside ROPE
#> -----------------------
#> 4   |  50 |      0.00 %
#> 4   | 100 |      0.00 %
#> 6   |  50 |      0.00 %
#> 6   | 100 |      0.00 %
#> 8   |  50 |      0.00 %
#> 8   | 100 |      0.00 %

equivalence_test(em1)
#> # Test for Practical Equivalence
#> 
#>   ROPE: [-0.10 0.10]
#> 
#> cyl |  hp |       H0 | inside ROPE |       95% HDI
#> --------------------------------------------------
#> 4   |  50 | Rejected |      0.00 % | [25.20 29.69]
#> 4   | 100 | Rejected |      0.00 % | [24.18 28.18]
#> 6   |  50 | Rejected |      0.00 % | [18.26 24.88]
#> 6   | 100 | Rejected |      0.00 % | [17.80 22.94]
#> 8   |  50 | Rejected |      0.00 % | [13.79 24.29]
#> 8   | 100 | Rejected |      0.00 % | [14.01 21.60]

Created on 2024-09-03 with reprex v2.1.0

To do:

codecov[bot] commented 3 weeks ago

Codecov Report

Attention: Patch coverage is 15.44715% with 104 lines in your changes missing coverage. Please review.

Project coverage is 52.19%. Comparing base (a007760) to head (f4f137c). Report is 26 commits behind head on main.

Files with missing lines Patch % Lines
R/ci.R 0.00% 14 Missing :warning:
R/spi.R 0.00% 9 Missing :warning:
R/bci.R 33.33% 6 Missing :warning:
R/equivalence_test.R 0.00% 6 Missing :warning:
R/estimate_density.R 0.00% 6 Missing :warning:
R/eti.R 0.00% 6 Missing :warning:
R/hdi.R 0.00% 6 Missing :warning:
R/map_estimate.R 0.00% 6 Missing :warning:
R/p_direction.R 0.00% 6 Missing :warning:
R/p_map.R 0.00% 6 Missing :warning:
... and 9 more
Additional details and impacted files ```diff @@ Coverage Diff @@ ## main #672 +/- ## ========================================== - Coverage 52.45% 52.19% -0.27% ========================================== Files 65 65 Lines 5309 5539 +230 ========================================== + Hits 2785 2891 +106 - Misses 2524 2648 +124 ```

:umbrella: View full report in Codecov by Sentry.
:loudspeaker: Have feedback on the report? Share it here.

mattansb commented 3 weeks ago

Alright, I'm done!

strengejacke commented 3 weeks ago

ok, so everything works except plot-methods, right?

mattansb commented 3 weeks ago

Yup. Just one last thing to change and then I'll merge.

strengejacke commented 3 weeks ago

are the latest changes in insight only required for plotting, or in general for emmeans/marginaleffects support?

mattansb commented 3 weeks ago

They are fundamental for this PR in general.