discrepancy between GLMMadaptive and glmmTMB

strengejacke commented 5 years ago

I'm comparing models fitted with your package and glmmTMB, to see how results match or differ. Here you see two examples. In the first, the coefficients from the count-component differ, while the zero-inflation part are identical. In the second example, coefficients both of the count-component and zero-inflated part are (almost) identical.

Do you have any ideas where the differences in the first model come from?

library(glmmTMB)
library(GLMMadaptive)

data("Salamanders")

m1 <- glmmTMB(
  count ~ spp + mined + (1 | site), 
  ziformula = ~ spp + mined, 
  family = truncated_poisson(), 
  data = Salamanders
)

m2 <- mixed_model(
  count ~ spp + mined,
  random = ~1 | site,
  zi_fixed = ~ spp + mined, 
  family = hurdle.poisson(), 
  data = Salamanders
)

summary(m1)
#>  Family: truncated_poisson  ( log )
#> Formula:          count ~ spp + mined + (1 | site)
#> Zero inflation:         ~spp + mined
#> Data: Salamanders
#> 
#>      AIC      BIC   logLik deviance df.resid 
#>   1790.2   1866.1   -878.1   1756.2      627 
#> 
#> Random effects:
#> 
#> Conditional model:
#>  Groups Name        Variance Std.Dev.
#>  site   (Intercept) 0.05318  0.2306  
#> Number of obs: 644, groups:  site, 23
#> 
#> Conditional model:
#>             Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -0.06702    0.20605  -0.325   0.7450    
#> sppPR       -0.52093    0.27872  -1.869   0.0616 .  
#> sppDM        0.22458    0.14485   1.550   0.1210    
#> sppEC-A     -0.19548    0.20122  -0.972   0.3313    
#> sppEC-L      0.64672    0.13229   4.889 1.01e-06 ***
#> sppDES-L     0.60514    0.13027   4.645 3.39e-06 ***
#> sppDF        0.04602    0.15397   0.299   0.7650    
#> minedno      1.01447    0.18724   5.418 6.02e-08 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Zero-inflation model:
#>             Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)   1.7556     0.2833   6.196 5.78e-10 ***
#> sppPR         1.6785     0.3964   4.234 2.30e-05 ***
#> sppDM        -0.4269     0.3502  -1.219  0.22288    
#> sppEC-A       1.1046     0.3684   2.998  0.00272 ** 
#> sppEC-L      -0.4269     0.3502  -1.219  0.22288    
#> sppDES-L     -0.6716     0.3519  -1.909  0.05631 .  
#> sppDF        -0.4269     0.3502  -1.219  0.22288    
#> minedno      -2.4038     0.2089 -11.508  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2)
#> 
#> Call:
#> mixed_model(fixed = count ~ spp + mined, random = ~1 | site, 
#>     data = Salamanders, family = hurdle.poisson(), zi_fixed = ~spp + 
#>         mined)
#> 
#> Data Descriptives:
#> Number of Observations: 644
#> Number of Groups: 23 
#> 
#> Model:
#>  family: hurdle poisson
#>  link: log 
#> 
#> Fit statistics:
#>   log.Lik      AIC      BIC
#>  -1005.88 2045.759 2065.063
#> 
#> Random effects covariance matrix:
#>               StdDev
#> (Intercept) 2.603956
#> 
#> Fixed effects:
#>             Estimate Std.Err  z-value    p-value
#> (Intercept)  -3.1554  0.2253 -14.0030    < 1e-04
#> sppPR         0.2035  0.2431   0.8368 0.40271196
#> sppDM         0.9135  0.1587   5.7577    < 1e-04
#> sppEC-A       0.6263  0.1941   3.2277 0.00124803
#> sppEC-L       1.4843  0.1376  10.7857    < 1e-04
#> sppDES-L      0.9575  0.1449   6.6075    < 1e-04
#> sppDF         0.3277  0.1573   2.0832 0.03722871
#> minedno       0.7276  0.1896   3.8370 0.00012453
#> 
#> Zero-part coefficients:
#>             Estimate Std.Err  z-value   p-value
#> (Intercept)   1.7556  0.2833   6.1963   < 1e-04
#> sppPR         1.6785  0.3964   4.2341   < 1e-04
#> sppDM        -0.4269  0.3502  -1.2190 0.2228629
#> sppEC-A       1.1045  0.3684   2.9981 0.0027163
#> sppEC-L      -0.4269  0.3502  -1.2190 0.2228629
#> sppDES-L     -0.6716  0.3519  -1.9087 0.0563035
#> sppDF        -0.4269  0.3502  -1.2190 0.2228629
#> minedno      -2.4038  0.2089 -11.5077   < 1e-04
#> 
#> Integration:
#> method: adaptive Gauss-Hermite quadrature rule
#> quadrature points: 11
#> 
#> Optimization:
#> method: hybrid EM and quasi-Newton
#> converged: TRUE

library(glmmTMB)
library(GLMMadaptive)

set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
t_max <- 5 # maximum follow-up time

# we constuct a data frame with the design: 
# everyone has a baseline measurment, and then measurements at random follow-up times
DF <- data.frame(id = rep(seq_len(n), each = K),
                 time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
                 sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

# design matrices for the fixed and random effects non-zero part
X <- model.matrix(~ sex * time, data = DF)
Z <- model.matrix(~ 1, data = DF)
# design matrices for the fixed and random effects zero part
X_zi <- model.matrix(~ sex, data = DF)
Z_zi <- model.matrix(~ 1, data = DF)

betas <- c(1.5, 0.05, 0.05, -0.03) # fixed effects coefficients non-zero part
shape <- 2 # shape/size parameter of the negative binomial distribution
gammas <- c(-1.5, 0.5) # fixed effects coefficients zero part
D11 <- 0.5 # variance of random intercepts non-zero part
D22 <- 0.4 # variance of random intercepts zero part

# we simulate random effects
b <- cbind(rnorm(n, sd = sqrt(D11)), rnorm(n, sd = sqrt(D22)))
# linear predictor non-zero part
eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1, drop = FALSE]))
# linear predictor zero part
eta_zi <- as.vector(X_zi %*% gammas + rowSums(Z_zi * b[DF$id, 2, drop = FALSE]))
# we simulate negative binomial longitudinal data
DF$y <- rnbinom(n * K, size = shape, mu = exp(eta_y))
# we set the extra zeros
DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0

fm1 <- mixed_model(y ~ sex * time, random = ~ 1 | id, data = DF,
                   family = zi.poisson(), zi_fixed = ~ sex)

fm2 <- glmmTMB(y ~ sex * time + (1 | id), data = DF,
                   family = poisson(), ziformula = ~ sex)

summary(fm1)
#> 
#> Call:
#> mixed_model(fixed = y ~ sex * time, random = ~1 | id, data = DF, 
#>     family = zi.poisson(), zi_fixed = ~sex)
#> 
#> Data Descriptives:
#> Number of Observations: 800
#> Number of Groups: 100 
#> 
#> Model:
#>  family: zero-inflated poisson
#>  link: log 
#> 
#> Fit statistics:
#>    log.Lik      AIC     BIC
#>  -2223.937 4461.874 4480.11
#> 
#> Random effects covariance matrix:
#>                StdDev
#> (Intercept) 0.8272898
#> 
#> Fixed effects:
#>                Estimate Std.Err z-value p-value
#> (Intercept)      1.5276  0.1288 11.8605 < 1e-04
#> sexfemale        0.0174  0.1819  0.0954 0.92396
#> time            -0.0040  0.0159 -0.2542 0.79937
#> sexfemale:time   0.0004  0.0230  0.0188 0.98496
#> 
#> Zero-part coefficients:
#>             Estimate Std.Err z-value   p-value
#> (Intercept)  -1.2106  0.1411 -8.5811   < 1e-04
#> sexfemale     0.4986  0.1823  2.7348 0.0062411
#> 
#> Integration:
#> method: adaptive Gauss-Hermite quadrature rule
#> quadrature points: 11
#> 
#> Optimization:
#> method: hybrid EM and quasi-Newton
#> converged: TRUE
summary(fm2)
#>  Family: poisson  ( log )
#> Formula:          y ~ sex * time + (1 | id)
#> Zero inflation:     ~sex
#> Data: DF
#> 
#>      AIC      BIC   logLik deviance df.resid 
#>   4462.6   4495.4  -2224.3   4448.6      793 
#> 
#> Random effects:
#> 
#> Conditional model:
#>  Groups Name        Variance Std.Dev.
#>  id     (Intercept) 0.6841   0.8271  
#> Number of obs: 800, groups:  id, 100
#> 
#> Conditional model:
#>                  Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)     1.5273202  0.1287453  11.863   <2e-16 ***
#> sexfemale       0.0168426  0.1818901   0.093    0.926    
#> time           -0.0040292  0.0158893  -0.254    0.800    
#> sexfemale:time  0.0004784  0.0229638   0.021    0.983    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Zero-inflation model:
#>             Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)  -1.2108     0.1413  -8.570  < 2e-16 ***
#> sexfemale     0.4985     0.1826   2.731  0.00632 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

drizopoulos commented 5 years ago

Thanks for noticing this. I've tried first with a simulation to see if something is consistently wrong, i.e., check the following:

library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2

simulate_fun <- function (seed) {
    set.seed(seed)
    n <- 200 # number of subjects
    K <- 8 # number of measurements per subject
    t_max <- 5 # maximum follow-up time

    # we constuct a data frame with the design: 
    # everyone has a baseline measurment, and then measurements at random follow-up times
    DF <- data.frame(id = rep(seq_len(n), each = K),
                     time = c(replicate(n, c(0, sort(runif(K - 1, 0, t_max))))),
                     sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))

    # design matrices for the fixed and random effects non-zero part
    X <- model.matrix(~ sex + time, data = DF)
    Z <- model.matrix(~ 1, data = DF)
    # design matrices for the fixed effects zero part
    X_zi <- model.matrix(~ sex + time, data = DF)

    betas <- c(1.5, 0.05, 0.05) # fixed effects coefficients non-zero part
    shape <- 2 # shape/size parameter of the negative binomial distribution
    gammas <- c(-1.5, -0.5, 0.5) # fixed effects coefficients zero part
    D11 <- 1 # variance of random intercepts non-zero part

    # we simulate random effects
    b <- cbind(rnorm(n, sd = sqrt(D11)))
    # linear predictor non-zero part
    eta_y <- as.vector(X %*% betas + rowSums(Z * b[DF$id, 1, drop = FALSE]))
    # linear predictor zero part
    eta_zi <- as.vector(X_zi %*% gammas)
    # we simulate truncated Poisson longitudinal data
    DF$y <- qpois(runif(n * K, ppois(0, exp(eta_y)), 1), exp(eta_y))
    # we set the zeros
    DF$y[as.logical(rbinom(n * K, size = 1, prob = plogis(eta_zi)))] <- 0
    DF
}

########################################################################################

M <- 100
betas_GLMMadaptive <- betas_glmmTMB <- matrix(0.0, M, 3)
gammas_GLMMadaptive <- gammas_glmmTMB <- matrix(0.0, M, 3)

for (m in seq_len(M)) {
    DF_m <- simulate_fun(2019 + m)
    m1 <- mixed_model(y ~ sex + time, random = ~ 1 | id, data = DF_m,
                      family = hurdle.poisson(), zi_fixed = ~ sex + time)
    betas_GLMMadaptive[m, ] <- fixef(m1)
    gammas_GLMMadaptive[m, ] <- fixef(m1, sub_model = "zero_part")
    ##########
    m2 <- glmmTMB(y ~ sex + time + (1 | id), ziformula = ~ sex + time, 
                  family = truncated_poisson(), data = DF_m)
    betas_glmmTMB[m, ] <- fixef(m2)$cond
    gammas_glmmTMB[m, ] <- fixef(m2)$zi
}

# Bias
true_betas <- c(1.5, 0.05, 0.05)
rbind(Bias_GLMMadaptive = colMeans(betas_GLMMadaptive - rep(true_betas, each = M)),
      Bias_glmmTMB = colMeans(betas_glmmTMB - rep(true_betas, each = M)))
#>                         [,1]        [,2]          [,3]
#> Bias_GLMMadaptive 0.02122751 -0.01035611 -0.0001626549
#> Bias_glmmTMB      0.02083213 -0.01037756 -0.0001601303

true_gammas <- c(-1.5, -0.5, 0.5)
rbind(Bias_GLMMadaptive = colMeans(gammas_GLMMadaptive - rep(true_gammas, each = M)),
      Bias_glmmTMB = colMeans(gammas_glmmTMB - rep(true_gammas, each = M)))
#>                           [,1]       [,2]         [,3]
#> Bias_GLMMadaptive -0.002330292 0.01484655 0.0001198375
#> Bias_glmmTMB      -0.002330305 0.01484668 0.0001197215

# RMSE
rbind(RMSE_GLMMadaptive = sqrt(colMeans((betas_GLMMadaptive - rep(true_betas, each = M))^2)),
      RMSE_glmmTMB = sqrt(colMeans((betas_glmmTMB - rep(true_betas, each = M))^2)))
#>                        [,1]      [,2]        [,3]
#> RMSE_GLMMadaptive 0.1039681 0.1427301 0.007961253
#> RMSE_glmmTMB      0.1040858 0.1430581 0.007960939

rbind(RMSE_GLMMadaptive = sqrt(colMeans((gammas_GLMMadaptive - rep(true_gammas, each = M))^2)),
      RMSE_glmmTMB = sqrt(colMeans((gammas_glmmTMB - rep(true_gammas, each = M))^2)))
#>                        [,1]      [,2]       [,3]
#> RMSE_GLMMadaptive 0.1182866 0.1079911 0.03894126
#> RMSE_glmmTMB      0.1182863 0.1079912 0.03894105

This suggests that the two packages are doing the same in simulated data.

Then I looked at the number of quadrature points. Package glmmTMB does Laplace approximation, which is equivalent to nAGQ = 1. When I set nAGQ = 2 (because of a small problem in the code, currently nAGQ = 1 does not work - I'll fix it soon) to mixed_model() I get results that are in the same direction as in glmmTMB, i.e.,

library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2

data("Salamanders")

m1 <- glmmTMB(count ~ spp + mined + (1 | site), ziformula = ~ spp + mined, 
    family = truncated_poisson(), data = Salamanders)

m2 <- mixed_model(count ~ spp + mined, random = ~ 1 | site, zi_fixed = ~ spp + mined, 
    family = hurdle.poisson(), data = Salamanders, nAGQ = 2)

fixef(m1)$cond
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#> -0.06702286 -0.52092708  0.22457540 -0.19548416  0.64672238  0.60513701 
#>       sppDF     minedno 
#>  0.04602476  1.01446593
fixef(m2)
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#> -0.70042204 -0.65075508  0.06425725 -0.01005135  1.05811127  0.48668936 
#>       sppDF     minedno 
#>  0.08016362  0.91360728

fixef(m1)$z
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#>   1.7555900   1.6784724  -0.4269123   1.1045525  -0.4269123  -0.6715877 
#>       sppDF     minedno 
#>  -0.4269123  -2.4037967
fixef(m2, "zero_part")
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#>   1.7555979   1.6785013  -0.4269270   1.1045352  -0.4269270  -0.6715963 
#>       sppDF     minedno 
#>  -0.4269270  -2.4037889

whereas when I set nAGQ to a higher number is where the differences occur, e.g.,

library("GLMMadaptive")
library("glmmTMB")
#> Warning: package 'glmmTMB' was built under R version 3.5.2

data("Salamanders")

m1 <- glmmTMB(count ~ spp + mined + (1 | site), ziformula = ~ spp + mined, 
    family = truncated_poisson(), data = Salamanders)

m2 <- mixed_model(count ~ spp + mined, random = ~ 1 | site, zi_fixed = ~ spp + mined, 
    family = hurdle.poisson(), data = Salamanders, nAGQ = 25)

fixef(m1)$cond
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#> -0.06702286 -0.52092708  0.22457540 -0.19548416  0.64672238  0.60513701 
#>       sppDF     minedno 
#>  0.04602476  1.01446593
fixef(m2)
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#>  -3.1900355   0.1976119   0.9137361   0.6538284   1.4999425   0.9589269 
#>       sppDF     minedno 
#>   0.3426018   0.7137289

fixef(m1)$z
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#>   1.7555900   1.6784724  -0.4269123   1.1045525  -0.4269123  -0.6715877 
#>       sppDF     minedno 
#>  -0.4269123  -2.4037967
fixef(m2, "zero_part")
#> (Intercept)       sppPR       sppDM     sppEC-A     sppEC-L    sppDES-L 
#>   1.7555979   1.6785013  -0.4269270   1.1045352  -0.4269270  -0.6715963 
#>       sppDF     minedno 
#>  -0.4269270  -2.4037889

This suggests to me that there could be an issue with the approximation of the integrals over the random effects in this dataset.

strengejacke commented 5 years ago

Ok, I see! I knew that GLMMadaptive uses a different approach to approximate the integrals, but I didn't know it makes such a difference. Maybe you could add a note to the docs, or describe this as short example in a vignette? I guess some users may not be expecting such "large" difference due to different approaches, either?

Here's another example with glmer(), that seem to confirm your guess:

library(GLMMadaptive)
library(lme4)
#> Loading required package: Matrix
#> 
#> Attaching package: 'lme4'
#> The following object is masked from 'package:GLMMadaptive':
#> 
#>     negative.binomial
library(HSAUR2)
#> Loading required package: tools
data("toenail")

m1 <- glmer(
  outcome ~ treatment * visit + (1 | patientID), 
  data = toenail, 
  family = binomial,
  nAGQ = 20
)

m2 <- mixed_model(
  outcome ~ treatment * visit,
  random = ~ 1 | patientID,
  data = toenail,
  family = binomial,
  nAGQ = 20
)

fixef(m1)
#>                (Intercept)       treatmentterbinafine 
#>                 -0.4530317                  0.1583433 
#>                      visit treatmentterbinafine:visit 
#>                 -0.7913051                 -0.2360022
fixef(m2)
#>                (Intercept)       treatmentterbinafine 
#>                 -0.4665846                  0.1585886 
#>                      visit treatmentterbinafine:visit 
#>                 -0.7915773                 -0.2361823

m3 <- glmer(
  outcome ~ treatment * visit + (1 | patientID),
  data = toenail,
  family = binomial
)

m4 <- mixed_model(
  outcome ~ treatment * visit,
  random = ~ 1 | patientID,
  data = toenail,
  family = binomial
)

fixef(m3)
#>                (Intercept)       treatmentterbinafine 
#>                -1.42518531                -0.01011846 
#>                      visit treatmentterbinafine:visit 
#>                -0.80443955                -0.23512185
fixef(m4)
#>                (Intercept)       treatmentterbinafine 
#>                 -0.5334504                  0.1566945 
#>                      visit treatmentterbinafine:visit 
#>                 -0.7898613                 -0.2357171

^{Created on 2019-01-22 by the reprex package (v0.2.1)}

Maybe you can close this issue then...

drizopoulos commented 5 years ago

Well, this has been the motivation in the first place to develop this package. Namely, that it does matter which method you use to approximate the integrals and that the adaptive Gaussian quadrature is the one that is considered the "gold-standard".

drizopoulos / GLMMadaptive

discrepancy between GLMMadaptive and glmmTMB #7