easystats / bayestestR

:ghost: Utilities for analyzing Bayesian models and posterior distributions
https://easystats.github.io/bayestestR/
GNU General Public License v3.0
563 stars 55 forks source link

Question on contr.equalprior function #644

Closed GiselaGovaart closed 6 months ago

GiselaGovaart commented 6 months ago

I have some questions regarding the contr.equalprior function, see the reprex below.

I run two brms models, m1 uses contr.equalprior and m2 uses contr.equalprior_pairs.

Question 1: What is the exact difference between contr.equalprior_pairs and contr.equalprior, and based on which criteria should I choose one or the other? Both contr.equalprior and contr.equalprior_pairs give me equal priors - just with different values. However, these different values also mean that the BFs are different, so the choice seems important.

Question 2:

In summary, my question: is it okay that the priors for different contrasts are different, if that is caused my the fact that for these contrasts I am comparing different combinations of levels of my factors, and as long as I don't compare contrasts that have different priors? And if not, is there a way to force the priors to be the same for an entire custom contrast matrix?

Question 3 I have two additional questions:

library(emmeans)
library(easystats)
#> # Attaching packages: easystats 0.7.0 (red = needs update)
#> ✖ bayestestR  0.13.1   ✔ correlation 0.8.4 
#> ✔ datawizard  0.9.1    ✔ effectsize  0.8.6 
#> ✔ insight     0.19.8   ✖ modelbased  0.8.6 
#> ✖ performance 0.10.8   ✖ parameters  0.21.4
#> ✔ report      0.5.8    ✖ see         0.8.1 
#> 
#> Restart the R-Session and update packages with `easystats::easystats_update()`.
library(brms)
#> Loading required package: Rcpp
#> Loading 'brms' package (version 2.18.0). Useful instructions
#> can be found by typing help('brms'). A more detailed introduction
#> to the package is available through vignette('brms_overview').
#> 
#> Attaching package: 'brms'
#> The following object is masked from 'package:stats':
#> 
#>     ar
library(tidyverse)
library(here)
#> here() starts at /private/var/folders/6h/zb056tz154712qv0hdv29h5w0000gn/T/RtmpzysS7l/reprex-339b529ac418-dopey-nyala
library(see)

# set-up:
project_seed = 2049
set.seed(project_seed) 

# Make df, with 12 subjects, with:
# Group (between subj factor): levels "fam" and "unfam"
# TestSpeaker (within): levels 1,2,3
# sleepState (between): levels awake, activesleep, quietsleep
# MMR: dependent variable
df = structure(list(Subj = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 
                                       3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 
                                       8L, 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L), 
                                     levels = c("Subj01","Subj02", "Subj03", "Subj04", "Subj05", "Subj06", 
                                                "Subj07", "Subj08","Subj09", "Subj10", "Subj11", "Subj12"), 
                                     class = "factor"),
                    Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                        1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                        2L, 2L, 2L), levels = c("fam", "unfam"), class = "factor"), 
                    TestSpeaker = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
                                              3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
                                              1L, 2L, 3L), levels = c("1", "2", "3"), class = "factor"),
                    MMR = c(38.3193739833394,-26.3345733319488, -36.5391489690257, 17.6727210461579, 13.0723969801114, 
                            -4.24116674572545, 21.2481968372323, 11.4709975141478, 1.82856457361663, 
                            43.7294164718069, 20.3131479540544, 0.785675781481198, -2.15902787287105, 
                            -25.4903371611785, 61.873025266565, 9.06733331354276, -18.7574284113092, 
                            6.33216574768327, -34.9097027199614, -3.96327639212982, 15.9747552016853, 
                            -38.801070417319, -6.46518128748102, -22.630432474398, 21.9291933267653, 
                            24.4946719999554, -31.2847387815119, -8.04032285881707, 14.2321054924562, 
                            8.80183458992482, 18.9550435474235, 33.2621031208136, -11.4198082197363, 
                            -14.7150081652997, 29.2510483128696, -31.1494998323233), 
                    sleepState = structure(c(2L,2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 
                                             1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 
                                             1L, 1L, 1L), levels = c("awake", "activesleep", "quietsleep"), class = "factor")),
               row.names = c(NA,-36L), class = c("tbl_df", "tbl", "data.frame"))

### MODEL 1: With  contr.equalprior set globally
options(contrasts = c("contr.equalprior", "contr.poly"))

m1 = brm(MMR ~ 1 + TestSpeaker * Group +
        sleepState * TestSpeaker +
        (1 + TestSpeaker * Group | Subj),
      data = df,
      family = gaussian(),
      prior = c(set_prior("normal(5.97, 23.34)",
                          class = "Intercept"),
                set_prior("normal(0, 23.34)",
                          class = "b"),
                set_prior("normal(0, 23.34)",
                          class = "sigma")),
      init = "random",
      control = list(
        adapt_delta = .99,
        max_treedepth = 15
      ),
      chains = 4,
      iter = 4000,
      warmup = 2000,
      thin = 1,
      algorithm = "sampling",
      cores = 4,
      seed = project_seed,
      save_pars = save_pars(all = TRUE)
)
#> Compiling Stan program...
#> Start sampling
#> Warning: There were 31 divergent transitions after warmup. See
#> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
#> to find out why this is a problem and how to eliminate them.
#> Warning: There were 2 chains where the estimated Bayesian Fraction of Missing Information was low. See
#> https://mc-stan.org/misc/warnings.html#bfmi-low
#> Warning: Examine the pairs() plot to diagnose sampling problems
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
#> Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#tail-ess

### MODEL 2: With  contr.equalprior_pairs set globally
options(contrasts = c("contr.equalprior_pairs", "contr.poly"))
m2 = brm(MMR ~ 1 + TestSpeaker * Group +
        sleepState * TestSpeaker +
        (1 + TestSpeaker * Group | Subj),
      data = df,
      family = gaussian(),
      prior = c(set_prior("normal(5.97, 23.34)",
                          class = "Intercept"),
                set_prior("normal(0, 23.34)",
                          class = "b"),
                set_prior("normal(0, 23.34)",
                          class = "sigma")),
      init = "random",
      control = list(
        adapt_delta = .99,
        max_treedepth = 15
      ),
      chains = 4,
      iter = 4000,
      warmup = 2000,
      thin = 1,
      algorithm = "sampling",
      cores = 4,
      seed = project_seed,
      save_pars = save_pars(all = TRUE)
  )
#> Compiling Stan program...
#> recompiling to avoid crashing R session
#> Start sampling
#> Warning: There were 52 divergent transitions after warmup. See
#> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
#> to find out why this is a problem and how to eliminate them.
#> Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
#> https://mc-stan.org/misc/warnings.html#bfmi-low
#> Warning: Examine the pairs() plot to diagnose sampling problems
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
#> Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#tail-ess

## Question 1 -------
# The contrast matrices are different:
contrasts(df$TestSpeaker) <- contr.equalprior_pairs
contrasts(df$TestSpeaker)
#>   [,1]       [,2]
#> 1  0.0  0.5773503
#> 2 -0.5 -0.2886751
#> 3  0.5 -0.2886751
contrasts(df$TestSpeaker) <- contr.equalprior
contrasts(df$TestSpeaker)
#>         [,1]       [,2]
#> 1  0.0000000  0.8164966
#> 2 -0.7071068 -0.4082483
#> 3  0.7071068 -0.4082483

# Both contr.equalprior and contr.equalprior_pairs give me equal priors:
m_prior <- unupdate(m1, verbose=FALSE) # sample priors from model
m_prior_pairwise <-
  m_prior %>%
  emmeans(~ TestSpeaker)  %>%
  pairs()
#> NOTE: Results may be misleading due to involvement in interactions
# Priors for m1:
point_estimate(m_prior_pairwise, centr = "mean", disp = TRUE)
#> Point Estimate
#> 
#> Parameter                   | Mean |    SD
#> ------------------------------------------
#> TestSpeaker1 - TestSpeaker2 | 0.04 | 33.19
#> TestSpeaker1 - TestSpeaker3 | 0.24 | 33.47
#> TestSpeaker2 - TestSpeaker3 | 0.19 | 33.63

m_prior <- unupdate(m2, verbose=FALSE) # sample priors from model
m_prior_pairwise <-
  m_prior %>%
  emmeans(~ TestSpeaker)  %>%
  pairs()
#> NOTE: Results may be misleading due to involvement in interactions
# Priors for m2:
point_estimate(m_prior_pairwise, centr = "mean", disp = TRUE)
#> Point Estimate
#> 
#> Parameter                   |  Mean |    SD
#> -------------------------------------------
#> TestSpeaker1 - TestSpeaker2 |  0.11 | 23.58
#> TestSpeaker1 - TestSpeaker3 | -0.20 | 23.52
#> TestSpeaker2 - TestSpeaker3 | -0.32 | 23.01

## Question 2 --------
### Question 2A
# I compare some pairs of the interaction between TestSpeaker*Group. 
# create a custom contrast matrix:
MMR.emm1 = m2 %>%
  emmeans(~ Group:TestSpeaker) 
MMR.emm1 # to set the custom contrast correctly
#>  Group TestSpeaker emmean lower.HPD upper.HPD
#>  fam   1           14.095     -2.11      30.3
#>  unfam 1            0.595    -17.94      19.2
#>  fam   2           -1.025    -19.72      15.8
#>  unfam 2           12.887     -5.88      31.2
#>  fam   3            3.345    -15.58      22.2
#>  unfam 3           -6.456    -25.13      12.3
#> 
#> Results are averaged over the levels of: sleepState 
#> Point estimate displayed: median 
#> HPD interval probability: 0.95

fam1 = c(1,0,0,0,0,0)
unfam1 = c(0,1,0,0,0,0)
fam2 = c(0,0,1,0,0,0)
unfam2 = c(0,0,0,1,0,0)
fam3 = c(0,0,0,0,1,0)
unfam3 = c(0,0,0,0,0,1)

custom_contrasts <- list(
  list("unfam2-fam1" = unfam2 - fam1),
  list("unfam2-unfam3" = unfam2 - unfam3),
  list("unfam1-unfam2" = unfam1 - unfam2),
  list("fam2-fam3" = fam2-fam3),
  list("((unfam2-unfam3)-(fam2-fam3))" = ((unfam2 - unfam3) - (fam2 - fam3)))
) 

# Compute the BF for these contrasts
# For m1:
#  comparison of prior distributions
m1_prior <- unupdate(m1, verbose=FALSE) # sample priors from model
m1_prior_pairs <-
  m1_prior %>%
  emmeans(~ Group:TestSpeaker)
m1_prior_pairs = contrast(m1_prior_pairs, method = custom_contrasts)
#  comparison of posterior distributions
m1_post_pairs <-
  m1 %>%
  emmeans(~ TestSpeaker:Group) 
m1_post_pairs = contrast(m1_post_pairs, method = custom_contrasts)
# Bayes Factors (Savage-Dickey density ratio)
BF_m1 <-
  m1_post_pairs %>%
  bf_parameters(prior = m1_prior_pairs) %>%
  arrange(log_BF) # sort according to BF
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
BF_m1
#> Bayes Factor (Savage-Dickey density ratio)
#> 
#> Parameter                     |    BF
#> -------------------------------------
#> unfam1-unfam2                 | 0.376
#> unfam2-unfam3                 | 0.475
#> fam2-fam3                     | 0.528
#> unfam2-fam1                   | 0.551
#> ((unfam2-unfam3)-(fam2-fam3)) | 0.840
#> 
#> * Evidence Against The Null: 0
plot(BF_m1)


# For m2:
#  comparison of prior distributions
m2_prior <- unupdate(m2, verbose=FALSE) # sample priors from model
m2_prior_pairs <-
  m2_prior %>%
  emmeans(~ Group:TestSpeaker)
m2_prior_pairs = contrast(m2_prior_pairs, method = custom_contrasts)
#  comparison of posterior distributions
m2_post_pairs <-
  m2 %>%
  emmeans(~ TestSpeaker:Group) 
m2_post_pairs = contrast(m2_post_pairs, method = custom_contrasts)
# Bayes Factors (Savage-Dickey density ratio)
BF_m2 <-
  m2_post_pairs %>%
  bf_parameters(prior = m2_prior_pairs) %>%
  arrange(log_BF) # sort according to BF
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
BF_m2
#> Bayes Factor (Savage-Dickey density ratio)
#> 
#> Parameter                     |    BF
#> -------------------------------------
#> unfam1-unfam2                 | 0.503
#> unfam2-unfam3                 | 0.555
#> fam2-fam3                     | 0.661
#> unfam2-fam1                   | 0.679
#> ((unfam2-unfam3)-(fam2-fam3)) |  1.26
#> 
#> * Evidence Against The Null: 0
plot(BF_m2)


# Check priors on the contrasts:
point_estimate(m1_prior_pairs, centr = "mean", disp = TRUE)
#> Point Estimate
#> 
#> Parameter                     |     Mean |    SD
#> ------------------------------------------------
#> unfam2-fam1                   |    -0.08 | 48.58
#> unfam2-unfam3                 |    -0.37 | 40.04
#> unfam1-unfam2                 |    -0.31 | 40.55
#> fam2-fam3                     |    -0.37 | 39.75
#> ((unfam2-unfam3)-(fam2-fam3)) | 2.22e-03 | 46.38
point_estimate(m2_prior_pairs, centr = "mean", disp = TRUE)
#> Point Estimate
#> 
#> Parameter                     |  Mean |    SD
#> ---------------------------------------------
#> unfam2-fam1                   |  0.61 | 33.97
#> unfam2-unfam3                 | -0.08 | 26.04
#> unfam1-unfam2                 | -0.01 | 26.27
#> fam2-fam3                     | -0.43 | 25.77
#> ((unfam2-unfam3)-(fam2-fam3)) |  0.36 | 23.13

### Question 2B
# Contrasts for the factor sleepState:
# make custom contrast:
MMR.emm2 = m2 %>%
  emmeans(~ sleepState) 
#> NOTE: Results may be misleading due to involvement in interactions
MMR.emm2 # to set the custom contrast correctly
#>  sleepState  emmean lower.HPD upper.HPD
#>  awake        -3.95    -16.08      8.57
#>  activesleep   8.89     -5.07     21.39
#>  quietsleep    6.61     -8.93     20.95
#> 
#> Results are averaged over the levels of: TestSpeaker, Group 
#> Point estimate displayed: median 
#> HPD interval probability: 0.95
# set custom contrast sleep
custom_contrasts_sleep <- list(
  list("awake - activesleep and quietsleep" = c(1, -1/2, -1/2)),
  list("quietsleep - awake and activesleep" = c(-1/2, -1/2, 1)),
  list("quietsleep vs activesleep" =c(0, 1, -1))
)

# Compute BFs (only for model 2)
#  comparison of prior distributions
m2_prior <- unupdate(m2, verbose=FALSE) # sample priors from model
m2_prior_sleep <-
  m2_prior %>%
  emmeans(~ sleepState)
#> NOTE: Results may be misleading due to involvement in interactions
m2_prior_sleep = contrast(m2_prior_sleep, method = custom_contrasts_sleep)
#  comparison of posterior distributions
m2_post_sleep <-
  m2 %>%
  emmeans(~ sleepState) 
#> NOTE: Results may be misleading due to involvement in interactions
m2_post_sleep = contrast(m2_post_sleep, method = custom_contrasts_sleep)
# Bayes Factors (Savage-Dickey density ratio)
BF_m2_sleep <-
  m2_post_sleep %>%
  bf_parameters(prior = m2_prior_sleep) %>%
  arrange(log_BF) # sort according to BF
#> Warning: Bayes factors might not be precise.
#>   For precise Bayes factors, sampling at least 40,000 posterior samples is
#>   recommended.
BF_m2_sleep
#> Bayes Factor (Savage-Dickey density ratio)
#> 
#> Parameter                          |    BF
#> ------------------------------------------
#> quietsleep vs activesleep          | 0.400
#> quietsleep - awake and activesleep | 0.488
#> awake - activesleep and quietsleep |  1.21
#> 
#> * Evidence Against The Null: 0
plot(BF_m2_sleep)


# Check the priors:
point_estimate(m2_prior_sleep, centr = "mean", disp = TRUE)
#> Point Estimate
#> 
#> Parameter                          |  Mean |    SD
#> --------------------------------------------------
#> awake - activesleep and quietsleep | -0.30 | 20.20
#> quietsleep - awake and activesleep |  0.34 | 20.20
#> quietsleep vs activesleep          | -0.25 | 23.61

Created on 2024-03-08 with reprex v2.0.2