easystats / performance

:muscle: Models' quality and performance metrics (R2, ICC, LOO, AIC, BF, ...)
https://easystats.github.io/performance/
GNU General Public License v3.0
1.02k stars 90 forks source link

Confidence intervals for ICC, it seems that none of the `ci_method`s work (neither default, `boot` or `analytical`). #745

Closed strengejacke closed 3 months ago

strengejacke commented 3 months ago

For the confidence intervals, it seems that none of the ci_methods work (neither default, boot or analytical).

icc(glmm_beta, ci = 0.95)
icc(glmm_beta, ci = 0.95, method = "boot")
icc(glmm_beta, ci = 0.95, method = "analytical")

Originally posted by @roaldarbol in https://github.com/easystats/performance/issues/742#issuecomment-2217148678

roaldarbol commented 3 months ago

Just to add context, this was at least the case for a beta_family GLMM.

roaldarbol commented 3 months ago

I tried to bootstrap manually to better understand where the issue creeps in (the code base is a bit hard to understand as an outsider, so it was easier to just bootstrap manually). Now, warnings() surface:

#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.

I tracked that warning down to the .variance_distributional in {insight}, here specifically: https://github.com/easystats/insight/blob/3b39fbf911ed581fabf4a1b782cd94d968dbc684/R/compute_variances.R#L812, but I can't track down where it gets called unfortunately. The warnings get silenced here though https://github.com/easystats/performance/blob/db8ab03fcbdedcfe786b54260d27bba4c1e053dd/R/icc.R#L591-L602

Here' a reprex with my bootstrapping.

library(boot)
library(performance)
library(glmmTMB)
library(tibble)

data <- tibble::tibble(
  animal_id = factor(
    c(
      "208", "208", "209", "209", "210", "210", "214", "214", "223", "223", "228",
      "228", "211", "211", "213", "213", "217", "217", "222", "222", "234", "234",
      "241", "241", "216", "216", "230", "230", "231", "231", "240", "240", "242",
      "242", "244", "244", "218", "218", "220", "220", "225", "225", "237", "237",
      "239", "239", "245", "219", "219", "236", "251", "251", "252", "252", "253",
      "253", "254", "254"
    ),
    levels = c(
      "200", "204", "205", "206", "215", "224", "208", "209", "210", "214", "223",
      "228", "211", "213", "217", "222", "234", "241", "216", "230", "231", "240",
      "242", "244", "218", "220", "225", "237", "239", "245", "219", "236", "251",
      "252", "253", "254"
    )
  ),
  trial = c(
    1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
    1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2,
    1, 2, 1, 2, 1, 2
  ),
  activity_ratio = c(
    0.8914582931189637, 0.8251125819520664, 0.704417912435994, 0.6429797045522079,
    0.7428360820544958, 0.7416957064438143, 0.6793835171056175,
    0.7534278702070989, 0.7116537094097775, 0.7639945058212043,
    0.8272100778696002, 0.8264990913298758, 0.7445559071415172,
    0.7911376794249645, 0.8897734327223286, 0.9138347192425403,
    0.5468511738147778, 0.8022125366247213, 0.8378463077948621,
    0.8527254596944677, 0.7622314357634615, 0.7164192267756896,
    0.7119530520569298, 0.8051675370015825, 0.8042960891815846,
    0.8116219543172005, 0.7611173161024801, 0.724074262956317, 0.6073504196141449,
    0.6526196030844679, 0.8070872433720373, 0.8880053394408807,
    0.7964546709282873, 0.6984266430511521, 0.7935560206831679,
    0.7543744697032041, 0.8645921114569987, 0.8962935045894147,
    0.6999538623771479, 0.7134721886290928, 0.7462869489534781,
    0.7631409786366209, 0.6649207881654442, 0.6309354708818021,
    0.8240100146822962, 0.7855022343275443, 0.7558096416689716,
    0.7490390808034896, 0.7983524314588314, 0.820632959774302, 0.6840643964843275,
    0.7398234779596219, 0.7353938525991223, 0.7295757066367529,
    0.8446626928905503, 0.8706135671047699, 0.6868156119525347,
    0.7542702777435738
  ),
)

 get_icc <- function(data, indices, formula, family){
   d <- data[indices,] # allows boot to select sample
   mod <- glmmTMB::glmmTMB(formula = formula,
                           data = d,
                           family = family)
   t <- as.numeric(icc(mod)[1])
   return(t)
 }
 # Gaussian works
 results_gauss <- boot::boot(
   data = data, 
   statistic = get_icc,
   R = 10,
   formula = activity_ratio ~ 1 + (1 | animal_id),
   family = gaussian
 )
 results_gauss[1]
#> $t0
#> [1] 0.5940239

 # Beta-family
 results_beta <- boot::boot(
   data = data, 
   statistic = get_icc,
   R = 10,
   formula = activity_ratio ~ 1 + (1 | animal_id),
   family = beta_family
   )
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#>   reliable.
#>   A reason can be that the null model could not be computed manually. Try
#>   to fit the null model manually and pass it to `null_model`.
 warnings()
 results_beta[1]
#> $t0
#> [1] 1

Created on 2024-07-11 with reprex v2.1.0

strengejacke commented 3 months ago

The main function for bootstrapping ICCs is:

https://github.com/easystats/performance/blob/db8ab03fcbdedcfe786b54260d27bba4c1e053dd/R/icc.R#L654-L675

For mixed models, we use lme4::bootMer(), because this function can better take random effects into account:

https://github.com/easystats/performance/blob/db8ab03fcbdedcfe786b54260d27bba4c1e053dd/R/icc.R#L618-L651

We could possibly increase the tolerance here:

https://github.com/easystats/performance/blob/db8ab03fcbdedcfe786b54260d27bba4c1e053dd/R/icc.R#L605-L615

Maybe I add an option that doesn't silence the warnings and errors.

roaldarbol commented 3 months ago

Maybe applying the Gamma prior on the random effects could also help here? icc() actually already has the verbose option, but that is currently not in place there.

strengejacke commented 3 months ago

icc() actually already has the verbose option, but that is currently not in place there.

Yes, should be in there now in #747

strengejacke commented 3 months ago

It works for me, except for ci_method = NULL, where the CI for the adjusted ICC is not accurate. But I don't think I can fix this, because you can't pass any arguments to bootMer() that will be passed down to the boot-function.

library(glmmTMB)
library(performance)
set.seed(123)
a <- seq(from = 5, to = 95)
b1 <- jitter(a, factor = 20)
b2 <- jitter(a, factor = 20)
b2 <- b2 + 30
b3 <- jitter(a, factor = 20)
b3 <- b3 + 30
t_a <- rep('a', length(a))
t_b <- rep('b', length(a))

c <- as.factor(a)
d <- data.frame(id = c(c,c,c,c),
                value = c(a,b1,b2,b3),
                treatment = c(t_a, t_a, t_b, t_b))
d$value <- d$value / (max(d$value) + 0.1)

# GLMMs and GLMs with beta and Gaussian families
glmm_beta <- glmmTMB::glmmTMB(value ~ treatment + (1|id),
                        data = d,
                        family=beta_family)

options(easystats_errors = TRUE)
icc(glmm_beta, ci = 0.95, ci_method = "analytical", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.992 [0.990, 0.994]
#>   Unadjusted ICC: 0.742 [0.691, 0.783]
icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = "boot", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.992 [0.992, 0.997]
#>   Unadjusted ICC: 0.742 [0.730, 0.774]
icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = NULL, verbose = TRUE)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.992 [1.000, 1.000]
#>   Unadjusted ICC: 0.742 [0.681, 0.793]

Created on 2024-07-13 with reprex v2.1.1

roaldarbol commented 3 months ago

Good to see that the analytical version works! I also realised that I had been using method initially, and only the last couple of days used ci_method. However, I get something that falls outside of the CI with this data using both boot and NULL:

library(glmmTMB)
library(performance)
library(tibble)
set.seed(123)
d <- tibble::tibble(
  id = factor(
    rep(
      c(
        "208", "209", "210", "214", "211", "213", "217", "222", "234", "241", "216",
        "230", "231", "240", "218", "220", "225", "237", "239", "219", "251", "254"
      ),
      each = 2L
    )
  ),
  trial = rep(c(1, 2), 22),
  value = c(
    0.863877517291236, 0.8500985916499201, 0.6541010255529416, 0.7582038836339923,
    0.6922830509391013, 0.7575009666972787, 0.6430974423340704,
    0.7713382336475698, 0.7649692734749585, 0.7898476946736548, 0.883326476678558,
    0.9135168527140827, 0.3561333035780704, 0.8467377516925126,
    0.8354452445244863, 0.8798297199656977, 0.7519457327249348,
    0.7099551260927718, 0.7025379324901558, 0.8060094582307198,
    0.8128011566020357, 0.8150695559515087, 0.7533734045838847,
    0.5884078386619355, 0.627989231219052, 0.6572662375116133, 0.807705876809919,
    0.9015309272481724, 0.8853861905590428, 0.8946941586945131,
    0.7415384045839729, 0.7106215229003023, 0.7331454873414073,
    0.7562495496383957, 0.6512322762646132, 0.6516200307066495,
    0.7934525374461099, 0.7396002178749655, 0.7066377184624543,
    0.7877103462614253, 0.6517006781210346, 0.7962228291912887,
    0.6792873354123711, 0.6876905228906177
  ),
)

# GLMMs and GLMs with beta and Gaussian families
glmm_beta <- glmmTMB::glmmTMB(value ~ trial + (1|id),
                              data = d,
                              family=beta_family)

options(easystats_errors = TRUE)
icc(glmm_beta, ci = 0.95, ci_method = "analytical", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.893 [0.803, 0.938]
#>   Unadjusted ICC: 0.761 [0.586, 0.856]

icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = "boot", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.893 [0.894, 0.994]
#>   Unadjusted ICC: 0.761 [0.612, 0.989]

icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = NULL, verbose = TRUE)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.893 [1.000, 1.000]
#>   Unadjusted ICC: 0.761 [0.587, 0.960]

packageVersion('performance')
#> [1] '0.12.0.8'
packageVersion('insight')
#> [1] '0.20.1.13'

Created on 2024-07-13 with reprex v2.1.0

roaldarbol commented 3 months ago

Ah! I think the estimate is not drawn from the bootstrap. I guess it should pull the median/mean estimate produced by the bootstrap rather than just the un-bootstrapped estimate. Does that sound right?

bwiernik commented 3 months ago

Typical practice for bootstrapped CIs is to use the original estimate as the point estimate and the bootstrap distribution just for the standard error or CI. The mean/median don't always align with the MLE point estimate (this is part of the motivation behind the bias-corrected and bias-corrected-and-accelerated bootstraps). The mean or median of the bootstrap distribution are potential estimators for the point estimate, but I would recommend we retain the original estimate as the point estimate for the default output in easystats.

strengejacke commented 3 months ago

Maybe the number of iterations is too low for accurate CIs?

bwiernik commented 3 months ago

How many iterations is default?

roaldarbol commented 3 months ago

How many iterations is default?

@bwiernik Default is 100 iterations.

roaldarbol commented 3 months ago

Just realised that the default ci_method is NULL, so if left at default, it's using .do_lme4_bootmer - the one that fails. If boot is specified, it does not in fact use .do_lme4_bootmer:

The main function for bootstrapping ICCs is:

https://github.com/easystats/performance/blob/db8ab03fcbdedcfe786b54260d27bba4c1e053dd/R/icc.R#L654-L657

So .do_lme4_bootmer still currently doesn't work. It also affects r2_nakagawa, so I think there's still something amiss in there.

(boot works better, but even with 1000 iterations, the estimate is practically on the 2.5% mark - that still seems suspicious.)

library(glmmTMB)
library(performance)
library(tibble)
library(insight)
set.seed(123)
d <- tibble::tibble(
  id = factor(
    rep(
      c(
        "208", "209", "210", "214", "223", "228", "211", "213", "217", "222", "234",
        "241", "216", "230", "231", "240", "242", "244", "218", "220", "225", "237",
        "239", "219", "251", "252", "253", "254"
      ),
      each = 2L
    ),
    levels = c(
      "200", "204", "205", "206", "215", "224", "208", "209", "210", "214", "223",
      "228", "211", "213", "217", "222", "234", "241", "216", "230", "231", "240",
      "242", "244", "218", "220", "225", "237", "239", "245", "219", "236", "251",
      "252", "253", "254"
    )
  ),
  trial = rep(c(1, 2), 28),
  value = c(
    0.09498024195328074, 0.06501857725091675, 0.11815590175658126,
    0.07432374123432427, 0.20160866416653947, 0.16168961981848032,
    0.18431356919353978, 0.1729636058911192, 0.08144358847160854,
    0.03429214493815157, 0.10657404730975248, 0.06867883079629465,
    0.05921577649099549, 0.10027697162542965, 0.22948745366059534,
    0.1318564983755886, 0.11521536711005413, 0.09988251038099903,
    0.11269870982289175, 0.10065372202090746, 0.30180913110275365,
    0.2784384586997261, 0.31090363146026273, 0.201433511508709,
    0.2064625890910772, 0.24142003722960942, 0.07379095189435501,
    0.05629768825555659, 0.23164588826907823, 0.2598283402933376,
    0.0773854788008177, 0.08506098926928175, 0.05543770346039902,
    0.08545932516134344, 0.13262226256298723, 0.13835676441921468,
    0.011223979573538618, 0.12175578019653631, 0.18844306077612688,
    0.18665264740941406, 0.30528203811103827, 0.36889354953270814,
    0.08101054157438016, 0.09939626397157338, 0.23026277596112824,
    0.3138068338604861, 0.10959072828854538, 0.1190276194587162,
    0.20927572336281877, 0.1531936170056966, 0.21758853563344488,
    0.12740603572231024, 0.11646060319509406, 0.05741400972805333,
    0.1767355830736662, 0.08745443933512403
  ),
) |>
  dplyr::group_by(id)

# GLMMs and GLMs with beta and Gaussian families
glmm_beta <- glmmTMB::glmmTMB(value ~ trial + (1|id),
                              data = d,
                              family=beta_family)

options(easystats_errors = TRUE)

# See variances
insight::get_variance(glmm_beta)
#> $var.fixed
#> [1] 0.002488934
#> 
#> $var.random
#> [1] 0.3247858
#> 
#> $var.residual
#> [1] 0.08161196
#> 
#> $var.distribution
#> [1] 0.08161196
#> 
#> $var.dispersion
#> [1] 0
#> 
#> $var.intercept
#>        id 
#> 0.3247858

# Compute some ICCs
icc(glmm_beta, ci = 0.95)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [1.000, 1.000]
#>   Unadjusted ICC: 0.794 [0.916, 1.000]
icc(glmm_beta, ci = 0.95, iterations = 10)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [1.000, 1.000]
#>   Unadjusted ICC: 0.794 [0.948, 0.998]
icc(glmm_beta, ci = 0.95, iterations = 50)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [1.000, 1.000]
#>   Unadjusted ICC: 0.794 [0.880, 1.000]
icc(glmm_beta, ci = 0.95, iterations = 100)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [1.000, 1.000]
#>   Unadjusted ICC: 0.794 [0.939, 1.000]
icc(glmm_beta, ci = 0.95, iterations = 1000)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [1.000, 1.000]
#>   Unadjusted ICC: 0.794 [0.928, 1.000]
icc(glmm_beta, ci = 0.95, ci_method = "boot")
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [0.804, 0.972]
#>   Unadjusted ICC: 0.794 [0.803, 0.962]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 10)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [0.809, 0.964]
#>   Unadjusted ICC: 0.794 [0.798, 0.962]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 50)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [0.792, 0.977]
#>   Unadjusted ICC: 0.794 [0.787, 0.968]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 100)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [0.793, 0.977]
#>   Unadjusted ICC: 0.794 [0.793, 0.974]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 1000)
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [0.790, 0.973]
#>   Unadjusted ICC: 0.794 [0.782, 0.970]
icc(glmm_beta, ci = 0.95, ci_method = "analytical")
#> # Intraclass Correlation Coefficient
#> 
#>     Adjusted ICC: 0.799 [0.670, 0.873]
#>   Unadjusted ICC: 0.794 [0.663, 0.870]

# And some R2s
r2_nakagawa(glmm_beta, ci = 0.95)
#> # R2 for Mixed Models
#> 
#>   Conditional R2: 0.800 [1.000, 1.000]
#>      Marginal R2: 0.006 [8.647e-05, 0.084]
r2_nakagawa(glmm_beta, ci = 0.95, iterations = 1000)
#> # R2 for Mixed Models
#> 
#>   Conditional R2: 0.800 [1.000, 1.000]
#>      Marginal R2: 0.006 [1.518e-05, 0.073]

packageVersion('performance')
#> [1] '0.12.2'
packageVersion('insight')
#> [1] '0.20.2'

Created on 2024-07-22 with reprex v2.1.0