Closed strengejacke closed 3 months ago
Just to add context, this was at least the case for a beta_family
GLMM.
I tried to bootstrap manually to better understand where the issue creeps in (the code base is a bit hard to understand as an outsider, so it was easier to just bootstrap manually). Now, warnings()
surface:
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
I tracked that warning down to the .variance_distributional
in {insight}, here specifically: https://github.com/easystats/insight/blob/3b39fbf911ed581fabf4a1b782cd94d968dbc684/R/compute_variances.R#L812, but I can't track down where it gets called unfortunately. The warnings get silenced here though https://github.com/easystats/performance/blob/db8ab03fcbdedcfe786b54260d27bba4c1e053dd/R/icc.R#L591-L602
Here' a reprex with my bootstrapping.
library(boot)
library(performance)
library(glmmTMB)
library(tibble)
data <- tibble::tibble(
animal_id = factor(
c(
"208", "208", "209", "209", "210", "210", "214", "214", "223", "223", "228",
"228", "211", "211", "213", "213", "217", "217", "222", "222", "234", "234",
"241", "241", "216", "216", "230", "230", "231", "231", "240", "240", "242",
"242", "244", "244", "218", "218", "220", "220", "225", "225", "237", "237",
"239", "239", "245", "219", "219", "236", "251", "251", "252", "252", "253",
"253", "254", "254"
),
levels = c(
"200", "204", "205", "206", "215", "224", "208", "209", "210", "214", "223",
"228", "211", "213", "217", "222", "234", "241", "216", "230", "231", "240",
"242", "244", "218", "220", "225", "237", "239", "245", "219", "236", "251",
"252", "253", "254"
)
),
trial = c(
1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2,
1, 2, 1, 2, 1, 2
),
activity_ratio = c(
0.8914582931189637, 0.8251125819520664, 0.704417912435994, 0.6429797045522079,
0.7428360820544958, 0.7416957064438143, 0.6793835171056175,
0.7534278702070989, 0.7116537094097775, 0.7639945058212043,
0.8272100778696002, 0.8264990913298758, 0.7445559071415172,
0.7911376794249645, 0.8897734327223286, 0.9138347192425403,
0.5468511738147778, 0.8022125366247213, 0.8378463077948621,
0.8527254596944677, 0.7622314357634615, 0.7164192267756896,
0.7119530520569298, 0.8051675370015825, 0.8042960891815846,
0.8116219543172005, 0.7611173161024801, 0.724074262956317, 0.6073504196141449,
0.6526196030844679, 0.8070872433720373, 0.8880053394408807,
0.7964546709282873, 0.6984266430511521, 0.7935560206831679,
0.7543744697032041, 0.8645921114569987, 0.8962935045894147,
0.6999538623771479, 0.7134721886290928, 0.7462869489534781,
0.7631409786366209, 0.6649207881654442, 0.6309354708818021,
0.8240100146822962, 0.7855022343275443, 0.7558096416689716,
0.7490390808034896, 0.7983524314588314, 0.820632959774302, 0.6840643964843275,
0.7398234779596219, 0.7353938525991223, 0.7295757066367529,
0.8446626928905503, 0.8706135671047699, 0.6868156119525347,
0.7542702777435738
),
)
get_icc <- function(data, indices, formula, family){
d <- data[indices,] # allows boot to select sample
mod <- glmmTMB::glmmTMB(formula = formula,
data = d,
family = family)
t <- as.numeric(icc(mod)[1])
return(t)
}
# Gaussian works
results_gauss <- boot::boot(
data = data,
statistic = get_icc,
R = 10,
formula = activity_ratio ~ 1 + (1 | animal_id),
family = gaussian
)
results_gauss[1]
#> $t0
#> [1] 0.5940239
# Beta-family
results_beta <- boot::boot(
data = data,
statistic = get_icc,
R = 10,
formula = activity_ratio ~ 1 + (1 | animal_id),
family = beta_family
)
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
#> Warning: Can't calculate model's distribution-specific variance. Results are not
#> reliable.
#> A reason can be that the null model could not be computed manually. Try
#> to fit the null model manually and pass it to `null_model`.
warnings()
results_beta[1]
#> $t0
#> [1] 1
Created on 2024-07-11 with reprex v2.1.0
The main function for bootstrapping ICCs is:
For mixed models, we use lme4::bootMer()
, because this function can better take random effects into account:
We could possibly increase the tolerance here:
Maybe I add an option that doesn't silence the warnings and errors.
Maybe applying the Gamma prior on the random effects could also help here?
icc()
actually already has the verbose
option, but that is currently not in place there.
icc() actually already has the verbose option, but that is currently not in place there.
Yes, should be in there now in #747
It works for me, except for ci_method = NULL
, where the CI for the adjusted ICC is not accurate. But I don't think I can fix this, because you can't pass any arguments to bootMer()
that will be passed down to the boot-function.
library(glmmTMB)
library(performance)
set.seed(123)
a <- seq(from = 5, to = 95)
b1 <- jitter(a, factor = 20)
b2 <- jitter(a, factor = 20)
b2 <- b2 + 30
b3 <- jitter(a, factor = 20)
b3 <- b3 + 30
t_a <- rep('a', length(a))
t_b <- rep('b', length(a))
c <- as.factor(a)
d <- data.frame(id = c(c,c,c,c),
value = c(a,b1,b2,b3),
treatment = c(t_a, t_a, t_b, t_b))
d$value <- d$value / (max(d$value) + 0.1)
# GLMMs and GLMs with beta and Gaussian families
glmm_beta <- glmmTMB::glmmTMB(value ~ treatment + (1|id),
data = d,
family=beta_family)
options(easystats_errors = TRUE)
icc(glmm_beta, ci = 0.95, ci_method = "analytical", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.992 [0.990, 0.994]
#> Unadjusted ICC: 0.742 [0.691, 0.783]
icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = "boot", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.992 [0.992, 0.997]
#> Unadjusted ICC: 0.742 [0.730, 0.774]
icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = NULL, verbose = TRUE)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.992 [1.000, 1.000]
#> Unadjusted ICC: 0.742 [0.681, 0.793]
Created on 2024-07-13 with reprex v2.1.1
Good to see that the analytical version works! I also realised that I had been using method
initially, and only the last couple of days used ci_method
. However, I get something that falls outside of the CI with this data using both boot
and NULL
:
library(glmmTMB)
library(performance)
library(tibble)
set.seed(123)
d <- tibble::tibble(
id = factor(
rep(
c(
"208", "209", "210", "214", "211", "213", "217", "222", "234", "241", "216",
"230", "231", "240", "218", "220", "225", "237", "239", "219", "251", "254"
),
each = 2L
)
),
trial = rep(c(1, 2), 22),
value = c(
0.863877517291236, 0.8500985916499201, 0.6541010255529416, 0.7582038836339923,
0.6922830509391013, 0.7575009666972787, 0.6430974423340704,
0.7713382336475698, 0.7649692734749585, 0.7898476946736548, 0.883326476678558,
0.9135168527140827, 0.3561333035780704, 0.8467377516925126,
0.8354452445244863, 0.8798297199656977, 0.7519457327249348,
0.7099551260927718, 0.7025379324901558, 0.8060094582307198,
0.8128011566020357, 0.8150695559515087, 0.7533734045838847,
0.5884078386619355, 0.627989231219052, 0.6572662375116133, 0.807705876809919,
0.9015309272481724, 0.8853861905590428, 0.8946941586945131,
0.7415384045839729, 0.7106215229003023, 0.7331454873414073,
0.7562495496383957, 0.6512322762646132, 0.6516200307066495,
0.7934525374461099, 0.7396002178749655, 0.7066377184624543,
0.7877103462614253, 0.6517006781210346, 0.7962228291912887,
0.6792873354123711, 0.6876905228906177
),
)
# GLMMs and GLMs with beta and Gaussian families
glmm_beta <- glmmTMB::glmmTMB(value ~ trial + (1|id),
data = d,
family=beta_family)
options(easystats_errors = TRUE)
icc(glmm_beta, ci = 0.95, ci_method = "analytical", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.893 [0.803, 0.938]
#> Unadjusted ICC: 0.761 [0.586, 0.856]
icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = "boot", verbose = TRUE)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.893 [0.894, 0.994]
#> Unadjusted ICC: 0.761 [0.612, 0.989]
icc(glmm_beta, ci = 0.95, iterations = 20, ci_method = NULL, verbose = TRUE)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.893 [1.000, 1.000]
#> Unadjusted ICC: 0.761 [0.587, 0.960]
packageVersion('performance')
#> [1] '0.12.0.8'
packageVersion('insight')
#> [1] '0.20.1.13'
Created on 2024-07-13 with reprex v2.1.0
Ah! I think the estimate is not drawn from the bootstrap. I guess it should pull the median/mean estimate produced by the bootstrap rather than just the un-bootstrapped estimate. Does that sound right?
Typical practice for bootstrapped CIs is to use the original estimate as the point estimate and the bootstrap distribution just for the standard error or CI. The mean/median don't always align with the MLE point estimate (this is part of the motivation behind the bias-corrected and bias-corrected-and-accelerated bootstraps). The mean or median of the bootstrap distribution are potential estimators for the point estimate, but I would recommend we retain the original estimate as the point estimate for the default output in easystats.
Maybe the number of iterations is too low for accurate CIs?
How many iterations is default?
How many iterations is default?
@bwiernik Default is 100 iterations.
Just realised that the default ci_method
is NULL
, so if left at default, it's using .do_lme4_bootmer
- the one that fails. If boot
is specified, it does not in fact use .do_lme4_bootmer
:
The main function for bootstrapping ICCs is:
So .do_lme4_bootmer
still currently doesn't work. It also affects r2_nakagawa
, so I think there's still something amiss in there.
(boot
works better, but even with 1000 iterations, the estimate is practically on the 2.5% mark - that still seems suspicious.)
library(glmmTMB)
library(performance)
library(tibble)
library(insight)
set.seed(123)
d <- tibble::tibble(
id = factor(
rep(
c(
"208", "209", "210", "214", "223", "228", "211", "213", "217", "222", "234",
"241", "216", "230", "231", "240", "242", "244", "218", "220", "225", "237",
"239", "219", "251", "252", "253", "254"
),
each = 2L
),
levels = c(
"200", "204", "205", "206", "215", "224", "208", "209", "210", "214", "223",
"228", "211", "213", "217", "222", "234", "241", "216", "230", "231", "240",
"242", "244", "218", "220", "225", "237", "239", "245", "219", "236", "251",
"252", "253", "254"
)
),
trial = rep(c(1, 2), 28),
value = c(
0.09498024195328074, 0.06501857725091675, 0.11815590175658126,
0.07432374123432427, 0.20160866416653947, 0.16168961981848032,
0.18431356919353978, 0.1729636058911192, 0.08144358847160854,
0.03429214493815157, 0.10657404730975248, 0.06867883079629465,
0.05921577649099549, 0.10027697162542965, 0.22948745366059534,
0.1318564983755886, 0.11521536711005413, 0.09988251038099903,
0.11269870982289175, 0.10065372202090746, 0.30180913110275365,
0.2784384586997261, 0.31090363146026273, 0.201433511508709,
0.2064625890910772, 0.24142003722960942, 0.07379095189435501,
0.05629768825555659, 0.23164588826907823, 0.2598283402933376,
0.0773854788008177, 0.08506098926928175, 0.05543770346039902,
0.08545932516134344, 0.13262226256298723, 0.13835676441921468,
0.011223979573538618, 0.12175578019653631, 0.18844306077612688,
0.18665264740941406, 0.30528203811103827, 0.36889354953270814,
0.08101054157438016, 0.09939626397157338, 0.23026277596112824,
0.3138068338604861, 0.10959072828854538, 0.1190276194587162,
0.20927572336281877, 0.1531936170056966, 0.21758853563344488,
0.12740603572231024, 0.11646060319509406, 0.05741400972805333,
0.1767355830736662, 0.08745443933512403
),
) |>
dplyr::group_by(id)
# GLMMs and GLMs with beta and Gaussian families
glmm_beta <- glmmTMB::glmmTMB(value ~ trial + (1|id),
data = d,
family=beta_family)
options(easystats_errors = TRUE)
# See variances
insight::get_variance(glmm_beta)
#> $var.fixed
#> [1] 0.002488934
#>
#> $var.random
#> [1] 0.3247858
#>
#> $var.residual
#> [1] 0.08161196
#>
#> $var.distribution
#> [1] 0.08161196
#>
#> $var.dispersion
#> [1] 0
#>
#> $var.intercept
#> id
#> 0.3247858
# Compute some ICCs
icc(glmm_beta, ci = 0.95)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [1.000, 1.000]
#> Unadjusted ICC: 0.794 [0.916, 1.000]
icc(glmm_beta, ci = 0.95, iterations = 10)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [1.000, 1.000]
#> Unadjusted ICC: 0.794 [0.948, 0.998]
icc(glmm_beta, ci = 0.95, iterations = 50)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [1.000, 1.000]
#> Unadjusted ICC: 0.794 [0.880, 1.000]
icc(glmm_beta, ci = 0.95, iterations = 100)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [1.000, 1.000]
#> Unadjusted ICC: 0.794 [0.939, 1.000]
icc(glmm_beta, ci = 0.95, iterations = 1000)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [1.000, 1.000]
#> Unadjusted ICC: 0.794 [0.928, 1.000]
icc(glmm_beta, ci = 0.95, ci_method = "boot")
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [0.804, 0.972]
#> Unadjusted ICC: 0.794 [0.803, 0.962]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 10)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [0.809, 0.964]
#> Unadjusted ICC: 0.794 [0.798, 0.962]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 50)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [0.792, 0.977]
#> Unadjusted ICC: 0.794 [0.787, 0.968]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 100)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [0.793, 0.977]
#> Unadjusted ICC: 0.794 [0.793, 0.974]
icc(glmm_beta, ci = 0.95, ci_method = "boot", iterations = 1000)
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [0.790, 0.973]
#> Unadjusted ICC: 0.794 [0.782, 0.970]
icc(glmm_beta, ci = 0.95, ci_method = "analytical")
#> # Intraclass Correlation Coefficient
#>
#> Adjusted ICC: 0.799 [0.670, 0.873]
#> Unadjusted ICC: 0.794 [0.663, 0.870]
# And some R2s
r2_nakagawa(glmm_beta, ci = 0.95)
#> # R2 for Mixed Models
#>
#> Conditional R2: 0.800 [1.000, 1.000]
#> Marginal R2: 0.006 [8.647e-05, 0.084]
r2_nakagawa(glmm_beta, ci = 0.95, iterations = 1000)
#> # R2 for Mixed Models
#>
#> Conditional R2: 0.800 [1.000, 1.000]
#> Marginal R2: 0.006 [1.518e-05, 0.073]
packageVersion('performance')
#> [1] '0.12.2'
packageVersion('insight')
#> [1] '0.20.2'
Created on 2024-07-22 with reprex v2.1.0
Originally posted by @roaldarbol in https://github.com/easystats/performance/issues/742#issuecomment-2217148678