wzmli / phyloglmm

GNU General Public License v3.0

4 stars 2 forks source link

Hi there,

The reason for opening this issue is regarding the emmeans estimation from a phylo_glmmTMB model. My model is basically:

Y ~ X1 + X2 + X3 + X4 + (1|species) + (1|rand1) + (1|rand3:rand2)

The model points that Y differs between the different levels of X1 (P-value = 0.00047), even accounting for the confounding effect of the remaining covariables X2, X3 and X4. Nevertheless, when estimating the emmeans for such a model, I get a really high SEs, which is quite unexpected given the very low P-value. If I do the same model but without adjusting for the phylogeny ( a regular glmmTMB), I get more reasonable SE estimates - about 4x lower. My impression is that it is related to the "phylogenetic" component of the phylo_glmmTMB that emmeans might get lost.

I am attaching the data, the used tree and a MRE as a zip file. I am also pasting the MRE below.

Any clarification on this issue is much appreciated.

Many thanks! Nicolay

#########################################################################

Reading and preparing the data

{
  data <- read.table("MRE/data2.txt")
  data$species_PM2 <- factor(data$species_PM2)
  data$X1 <- factor(data$X1, levels = c("1-10", "11-100", "101-1000", "> 1000"))

  tree_MRE <- ape::read.tree("MRE/tree_MRE.txt")
  library(phyloglmm)
  tol <- sqrt(.Machine$double.eps)
  if (any(tree_MRE$edge.length<(-1*tol))) stop("negative edge lengths")
  tree_MRE$edge.length <- pmax(0, tree_MRE$edge.length)

  ## enforced match between
  system.time(phyloZ <- phylo.to.Z(tree_MRE))
  phyloZ <- phyloZ[levels(factor(data$species_PM2)), ]
}

Model with phylogeny

MRE_m1_PHY <- phylo_glmmTMB(Y ~
                              X1 +
                              X2 +
                              X3 +
                              X4 +
                              (1|species_PM2) + (1|rand1) + (1|rand3:rand2),
                            , data = data
                            , phyloZ = phyloZ
                            , phylonm = "species_PM2"
                            , REML = FALSE
                            , family = "gaussian"
)

Model without phylogeny

library(glmmTMB)
MRE_m1_NoPHY <- glmmTMB(Y ~
                          X1 +
                          X2 +
                          X3 +
                          X4 +
                          (1|species_PM2) + (1|rand1) + (1|rand3:rand2),
                        , data = data
                        # , phyloZ = phyloZ
                        # , phylonm = "species_PM2"
                        , REML = FALSE
                        , family = "gaussian"
)

Emmeans

{
  MRE_m1_NoPHY_emmeans <- emmeans::lsmeans(MRE_m1_NoPHY, pairwise ~ X1 + X2 + X3 + X4)
  MRE_m1_PHY_emmeans <- emmeans::lsmeans(MRE_m1_PHY , pairwise ~ X1 + X2 + X3 + X4)

}
cbind(as.data.frame(MRE_m1_NoPHY_emmeans$lsmeans)[, 5:6], as.data.frame(MRE_m1_PHY_emmeans$lsmeans)[, 5:6])

Anova Type-II

car::Anova(MRE_m1_PHY)
car::Anova(MRE_m1_NoPHY)

MRE.zip

library(ape) library(phyloglmm) # Importing the data data_MRE <- read.table("https://raw.githubusercontent.com/nicolaycunha/MRE/main/data_MRE2.txt", header = TRUE) tree_MRE <- read.tree("https://raw.githubusercontent.com/nicolaycunha/MRE/main/tree_MRE2.txt") head(data_MRE) # Estimating the Phylo_Z matrix { tol <- sqrt(.Machine$double.eps) if (any(tree_MRE$edge.length<(-1*tol))) stop("negative edge lengths") tree_MRE$edge.length <- pmax(0, tree_MRE$edge.length) system.time(phyloZ_MRE <- phylo.to.Z(tree_MRE)) rr1 <- rownames(phyloZ_MRE) rr2 <- data_MRE$species identical(rr1, rr2) } # converting to factor data_MRE$species <- factor(data_MRE$species) data_MRE$LF <- factor(data_MRE$LF) data_MRE$X <- factor(data_MRE$X) # Ordering phyto Z phyloZ_MRE = phyloZ_MRE[levels(data_MRE$species), ] # setting numeric data_MRE$y <- ifelse(data_MRE$y == "yes", 1, 0) model_MRE <- phylo_glmmTMB(y ~ X + LF + (1|species) , data = data_MRE , phyloZ = phyloZ_MRE , phylonm = "species" , REML = FALSE , family = "binomial") library(car) Anova(model_MRE) library(emmeans) library(ggeffects) # calculating the lsmeans and asymmetrical SE { df <- as.data.frame(emmeans::lsmeans(model_MRE, pairwise ~ X))[1:2, ] summary((lsmeans(model_MRE, pairwise ~ X)), type = "response") df$inv_logit_mean <- arm:::invlogit(as.data.frame(emmeans::lsmeans(model_MRE, pairwise ~ X))[1:2, 3]) df$inv_logit_SE_POSITIVE <- arm:::invlogit(as.data.frame(emmeans::lsmeans(model_MRE, pairwise ~ X))[1:2, 3] + as.data.frame(emmeans::lsmeans(model_MRE, pairwise ~ X))[1:2, 4]) df$inv_logit_SE_NEGATIVE <- arm:::invlogit(as.data.frame(emmeans::lsmeans(model_MRE, pairwise ~ X))[1:2, 3] - as.data.frame(emmeans::lsmeans(model_MRE, pairwise ~ X))[1:2, 4]) } # plotting the results library(ggplot2) ggplot(df, aes(x = X, y = inv_logit_mean))+ geom_errorbar(aes(ymin = inv_logit_SE_NEGATIVE, ymax = inv_logit_SE_POSITIVE), width = 0.10, size = 0.6, colour = "black", alpha = 0.9, position = position_dodge(width = 0.5)) + geom_point(aes(), size = 3, shape = 21, fill = "black", position = position_dodge(width = 0.5)) + theme_classic() + ylab("Y") + xlab("X")