Closed strengejacke closed 5 years ago
Thanks for noticing this. I've tried first with a simulation to see if something is consistently wrong, i.e., check the following:
library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2
simulate_fun <- function (seed) {
set.seed(seed)
n <- 200 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time
# we constuct a data frame with the design:
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex + time, data = DF)
Z <- model.matrix(~ 1, data = DF)
# design matrices for the fixed effects zero part
X_zi <- model.matrix(~ sex + time, data = DF)
betas <- c(1.5, 0.05, 0.05) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, -0.5, 0.5) # fixed effects coefficients zero part
D11 <- 1 # variance of random intercepts non-zero part
# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas)
# we simulate truncated Poisson longitudinal data
DF$y <- qpois(runif(n * K, ppois(0, exp(eta_y)), 1), exp(eta_y))
# we set the zeros
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0
DF
}
########################################################################################
M <- 100
betas_GLMMadaptive <- betas_glmmTMB <- matrix(0.0, M, 3)
gammas_GLMMadaptive <- gammas_glmmTMB <- matrix(0.0, M, 3)
for (m in seq_len(M)) {
DF_m <- simulate_fun(2019 + m)
m1 <- mixed_model(y ~ sex + time, random = ~ 1 | id, data = DF_m,
family = hurdle.poisson(), zi_fixed = ~ sex + time)
betas_GLMMadaptive[m, ] <- fixef(m1)
gammas_GLMMadaptive[m, ] <- fixef(m1, sub_model = "zero_part")
##########
m2 <- glmmTMB(y ~ sex + time + (1 | id), ziformula = ~ sex + time,
family = truncated_poisson(), data = DF_m)
betas_glmmTMB[m, ] <- fixef(m2)$cond
gammas_glmmTMB[m, ] <- fixef(m2)$zi
}
# Bias
true_betas <- c(1.5, 0.05, 0.05)
rbind(Bias_GLMMadaptive = colMeans(betas_GLMMadaptive - rep(true_betas, each = M)),
Bias_glmmTMB = colMeans(betas_glmmTMB - rep(true_betas, each = M)))
#> [,1] [,2] [,3]
#> Bias_GLMMadaptive 0.02122751 -0.01035611 -0.0001626549
#> Bias_glmmTMB 0.02083213 -0.01037756 -0.0001601303
true_gammas <- c(-1.5, -0.5, 0.5)
rbind(Bias_GLMMadaptive = colMeans(gammas_GLMMadaptive - rep(true_gammas, each = M)),
Bias_glmmTMB = colMeans(gammas_glmmTMB - rep(true_gammas, each = M)))
#> [,1] [,2] [,3]
#> Bias_GLMMadaptive -0.002330292 0.01484655 0.0001198375
#> Bias_glmmTMB -0.002330305 0.01484668 0.0001197215
# RMSE
rbind(RMSE_GLMMadaptive = sqrt(colMeans((betas_GLMMadaptive - rep(true_betas, each = M))^2)),
RMSE_glmmTMB = sqrt(colMeans((betas_glmmTMB - rep(true_betas, each = M))^2)))
#> [,1] [,2] [,3]
#> RMSE_GLMMadaptive 0.1039681 0.1427301 0.007961253
#> RMSE_glmmTMB 0.1040858 0.1430581 0.007960939
rbind(RMSE_GLMMadaptive = sqrt(colMeans((gammas_GLMMadaptive - rep(true_gammas, each = M))^2)),
RMSE_glmmTMB = sqrt(colMeans((gammas_glmmTMB - rep(true_gammas, each = M))^2)))
#> [,1] [,2] [,3]
#> RMSE_GLMMadaptive 0.1182866 0.1079911 0.03894126
#> RMSE_glmmTMB 0.1182863 0.1079912 0.03894105
This suggests that the two packages are doing the same in simulated data.
Then I looked at the number of quadrature points. Package glmmTMB does Laplace approximation, which is equivalent to nAGQ = 1
. When I set nAGQ = 2
(because of a small problem in the code, currently nAGQ = 1
does not work - I'll fix it soon) to mixed_model()
I get results that are in the same direction as in glmmTMB, i.e.,
library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2
data("Salamanders")
m1 <- glmmTMB(count ~ spp + mined + (1 | site), ziformula = ~ spp + mined,
family = truncated_poisson(), data = Salamanders)
m2 <- mixed_model(count ~ spp + mined, random = ~ 1 | site, zi_fixed = ~ spp + mined,
family = hurdle.poisson(), data = Salamanders, nAGQ = 2)
fixef(m1)$cond
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -0.06702286 -0.52092708 0.22457540 -0.19548416 0.64672238 0.60513701
#> sppDF minedno
#> 0.04602476 1.01446593
fixef(m2)
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -0.70042204 -0.65075508 0.06425725 -0.01005135 1.05811127 0.48668936
#> sppDF minedno
#> 0.08016362 0.91360728
fixef(m1)$z
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555900 1.6784724 -0.4269123 1.1045525 -0.4269123 -0.6715877
#> sppDF minedno
#> -0.4269123 -2.4037967
fixef(m2, "zero_part")
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555979 1.6785013 -0.4269270 1.1045352 -0.4269270 -0.6715963
#> sppDF minedno
#> -0.4269270 -2.4037889
whereas when I set nAGQ
to a higher number is where the differences occur, e.g.,
library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2
data("Salamanders")
m1 <- glmmTMB(count ~ spp + mined + (1 | site), ziformula = ~ spp + mined,
family = truncated_poisson(), data = Salamanders)
m2 <- mixed_model(count ~ spp + mined, random = ~ 1 | site, zi_fixed = ~ spp + mined,
family = hurdle.poisson(), data = Salamanders, nAGQ = 25)
fixef(m1)$cond
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -0.06702286 -0.52092708 0.22457540 -0.19548416 0.64672238 0.60513701
#> sppDF minedno
#> 0.04602476 1.01446593
fixef(m2)
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> -3.1900355 0.1976119 0.9137361 0.6538284 1.4999425 0.9589269
#> sppDF minedno
#> 0.3426018 0.7137289
fixef(m1)$z
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555900 1.6784724 -0.4269123 1.1045525 -0.4269123 -0.6715877
#> sppDF minedno
#> -0.4269123 -2.4037967
fixef(m2, "zero_part")
#> (Intercept) sppPR sppDM sppEC-A sppEC-L sppDES-L
#> 1.7555979 1.6785013 -0.4269270 1.1045352 -0.4269270 -0.6715963
#> sppDF minedno
#> -0.4269270 -2.4037889
This suggests to me that there could be an issue with the approximation of the integrals over the random effects in this dataset.
Ok, I see! I knew that GLMMadaptive uses a different approach to approximate the integrals, but I didn't know it makes such a difference. Maybe you could add a note to the docs, or describe this as short example in a vignette? I guess some users may not be expecting such "large" difference due to different approaches, either?
Here's another example with glmer()
, that seem to confirm your guess:
library(GLMMadaptive)
library(lme4)
#> Loading required package: Matrix
#>
#> Attaching package: 'lme4'
#> The following object is masked from 'package:GLMMadaptive':
#>
#> negative.binomial
library(HSAUR2)
#> Loading required package: tools
data("toenail")
m1 <- glmer(
outcome ~ treatment * visit + (1 | patientID),
data = toenail,
family = binomial,
nAGQ = 20
)
m2 <- mixed_model(
outcome ~ treatment * visit,
random = ~ 1 | patientID,
data = toenail,
family = binomial,
nAGQ = 20
)
fixef(m1)
#> (Intercept) treatmentterbinafine
#> -0.4530317 0.1583433
#> visit treatmentterbinafine:visit
#> -0.7913051 -0.2360022
fixef(m2)
#> (Intercept) treatmentterbinafine
#> -0.4665846 0.1585886
#> visit treatmentterbinafine:visit
#> -0.7915773 -0.2361823
m3 <- glmer(
outcome ~ treatment * visit + (1 | patientID),
data = toenail,
family = binomial
)
m4 <- mixed_model(
outcome ~ treatment * visit,
random = ~ 1 | patientID,
data = toenail,
family = binomial
)
fixef(m3)
#> (Intercept) treatmentterbinafine
#> -1.42518531 -0.01011846
#> visit treatmentterbinafine:visit
#> -0.80443955 -0.23512185
fixef(m4)
#> (Intercept) treatmentterbinafine
#> -0.5334504 0.1566945
#> visit treatmentterbinafine:visit
#> -0.7898613 -0.2357171
Created on 2019-01-22 by the reprex package (v0.2.1)
Maybe you can close this issue then...
Well, this has been the motivation in the first place to develop this package. Namely, that it does matter which method you use to approximate the integrals and that the adaptive Gaussian quadrature is the one that is considered the "gold-standard".
I'm comparing models fitted with your package and glmmTMB, to see how results match or differ. Here you see two examples. In the first, the coefficients from the count-component differ, while the zero-inflation part are identical. In the second example, coefficients both of the count-component and zero-inflated part are (almost) identical.
Do you have any ideas where the differences in the first model come from?