Open mattansb opened 5 months ago
Is there a safe way to find out which contrasts were used?
contrs <- list(
# makes sense:
treatment = contr.treatment,
SAS = contr.SAS,
# doens't make sense:
deviation = datawizard::contr.deviation,
sum = contr.sum,
helmert = contr.helmert,
poly = contr.poly,
equalprior = bayestestR::contr.equalprior
)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
models <- lapply(contrs, function(.c) {
lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
})
lapply(models, function(i) i$contrasts)
#> $treatment
#> $treatment$cyl
#> 6 8
#> 4 0 0
#> 6 1 0
#> 8 0 1
#>
#>
#> $SAS
#> $SAS$cyl
#> 4 6
#> 4 1 0
#> 6 0 1
#> 8 0 0
#>
#>
#> $deviation
#> $deviation$cyl
#> 6 8
#> 4 -0.3333333 -0.3333333
#> 6 0.6666667 -0.3333333
#> 8 -0.3333333 0.6666667
#>
#>
#> $sum
#> $sum$cyl
#> [,1] [,2]
#> 4 1 0
#> 6 0 1
#> 8 -1 -1
#>
#>
#> $helmert
#> $helmert$cyl
#> [,1] [,2]
#> 4 -1 -1
#> 6 1 -1
#> 8 0 2
#>
#>
#> $poly
#> $poly$cyl
#> .L .Q
#> 4 -7.071068e-01 0.4082483
#> 6 -7.850462e-17 -0.8164966
#> 8 7.071068e-01 0.4082483
#>
#>
#> $equalprior
#> $equalprior$cyl
#> [,1] [,2]
#> 4 0.0000000 0.8164966
#> 6 -0.7071068 -0.4082483
#> 8 0.7071068 -0.4082483
Created on 2024-04-10 with reprex v2.1.0
Is there a safe way to find out which contrasts were used?
No, I don't think so, but it should only be done if the contrast matrix has a row of all 0s.
contrs <- list(
# makes sense:
treatment = contr.treatment,
SAS = contr.SAS,
# doens't make sense:
deviation = datawizard::contr.deviation,
sum = contr.sum,
helmert = contr.helmert,
poly = contr.poly,
equalprior = bayestestR::contr.equalprior
)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
models <- lapply(contrs, function(.c) {
lm(mpg ~ cyl, data = mtcars, contrasts = list(cyl = .c))
})
all_zero_row <- function(m) {
apply(m == 0, 1, all)
}
has_zeros_row <- function(m) {
any(all_zero_row(m))
}
lapply(models, function(i) {
lapply(i$contrasts, has_zeros_row)
})
#> $treatment
#> $treatment$cyl
#> [1] TRUE
#>
#>
#> $SAS
#> $SAS$cyl
#> [1] TRUE
#>
#>
#> $deviation
#> $deviation$cyl
#> [1] FALSE
#>
#>
#> $sum
#> $sum$cyl
#> [1] FALSE
#>
#>
#> $helmert
#> $helmert$cyl
#> [1] FALSE
#>
#>
#> $poly
#> $poly$cyl
#> [1] FALSE
#>
#>
#> $equalprior
#> $equalprior$cyl
#> [1] FALSE
Created on 2024-04-10 with reprex v2.1.0
library(parameters)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)
m <- lm(mpg ~ cyl + gear, data = mtcars, contrasts = list(cyl = datawizard::contr.deviation))
model_parameters(m, include_reference = TRUE)
#> Parameter | Coefficient | SE | 95% CI | t(27) | p
#> -------------------------------------------------------------------
#> (Intercept) | 19.70 | 1.18 | [ 17.28, 22.11] | 16.71 | < .001
#> cyl [6] | -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8] | -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [3] | 0.00 | | | |
#> gear [4] | 1.32 | 1.93 | [ -2.63, 5.28] | 0.69 | 0.498
#> gear [5] | 1.50 | 1.85 | [ -2.31, 5.31] | 0.81 | 0.426
#>
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#> using a Wald t-distribution approximation.
m <- lm(mpg ~ cyl + gear, data = mtcars)
model_parameters(m, include_reference = TRUE)
#> Parameter | Coefficient | SE | 95% CI | t(27) | p
#> -------------------------------------------------------------------
#> (Intercept) | 25.43 | 1.88 | [ 21.57, 29.29] | 13.52 | < .001
#> cyl [4] | 0.00 | | | |
#> cyl [6] | -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8] | -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [3] | 0.00 | | | |
#> gear [4] | 1.32 | 1.93 | [ -2.63, 5.28] | 0.69 | 0.498
#> gear [5] | 1.50 | 1.85 | [ -2.31, 5.31] | 0.81 | 0.426
#>
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#> using a Wald t-distribution approximation.
m <- lm(
mpg ~ cyl + gear,
data = mtcars,
contrasts = list(
cyl = datawizard::contr.deviation,
gear = contr.sum
)
)
model_parameters(m, include_reference = TRUE)
#> Parameter | Coefficient | SE | 95% CI | t(27) | p
#> -------------------------------------------------------------------
#> (Intercept) | 20.64 | 0.67 | [ 19.26, 22.01] | 30.76 | < .001
#> cyl [6] | -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
#> cyl [8] | -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
#> gear [1] | -0.94 | 1.09 | [ -3.18, 1.30] | -0.86 | 0.396
#> gear [2] | 0.38 | 1.11 | [ -1.90, 2.67] | 0.34 | 0.734
#>
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#> using a Wald t-distribution approximation.
m <- lm(
mpg ~ cyl + gear,
data = mtcars,
contrasts = list(
cyl = contr.SAS,
gear = contr.sum
)
)
model_parameters(m, include_reference = TRUE)
#> Parameter | Coefficient | SE | 95% CI | t(27) | p
#> ------------------------------------------------------------------
#> (Intercept) | 15.83 | 1.24 | [13.28, 18.37] | 12.75 | < .001
#> cyl [8] | 0.00 | | | |
#> cyl [4] | 10.54 | 1.96 | [ 6.52, 14.56] | 5.38 | < .001
#> cyl [6] | 3.89 | 1.88 | [ 0.03, 7.75] | 2.07 | 0.049
#> gear [1] | -0.94 | 1.09 | [-3.18, 1.30] | -0.86 | 0.396
#> gear [2] | 0.38 | 1.11 | [-1.90, 2.67] | 0.34 | 0.734
#>
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#> using a Wald t-distribution approximation.
m <- lm(
mpg ~ cyl + gear,
data = mtcars,
contrasts = list(
cyl = contr.SAS,
gear = contr.treatment
)
)
model_parameters(m, include_reference = TRUE)
#> Parameter | Coefficient | SE | 95% CI | t(27) | p
#> ------------------------------------------------------------------
#> (Intercept) | 14.89 | 0.92 | [13.00, 16.77] | 16.19 | < .001
#> cyl [8] | 0.00 | | | |
#> cyl [4] | 10.54 | 1.96 | [ 6.52, 14.56] | 5.38 | < .001
#> cyl [6] | 3.89 | 1.88 | [ 0.03, 7.75] | 2.07 | 0.049
#> gear [3] | 0.00 | | | |
#> gear [4] | 1.32 | 1.93 | [-2.63, 5.28] | 0.69 | 0.498
#> gear [5] | 1.50 | 1.85 | [-2.31, 5.31] | 0.81 | 0.426
#>
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
#> using a Wald t-distribution approximation.
Created on 2024-04-26 with reprex v2.1.0
This currently works for those model objects that have stored their contrasts as model$contrasts
. We need to make this more robust for other models, too - or does every package with modelling functions save their contrasts like this?
glmmTMB saves it as function...
library(glmmTMB)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)
m <- glmmTMB(mpg ~ cyl + (1 | gear), data = mtcars, contrasts = list(cyl = contr.sum))
m$modelInfo$contrasts
#> $cyl
#> function (n, contrasts = TRUE, sparse = FALSE)
#> {
#> if (length(n) <= 1L) {
#> if (is.numeric(n) && length(n) == 1L && n > 1L)
#> levels <- seq_len(n)
#> else stop("not enough degrees of freedom to define contrasts")
#> }
#> else levels <- n
#> levels <- as.character(levels)
#> cont <- .Diag(levels, sparse = sparse)
#> if (contrasts) {
#> cont <- cont[, -length(levels), drop = FALSE]
#> cont[length(levels), ] <- -1
#> colnames(cont) <- NULL
#> }
#> cont
#> }
#> <bytecode: 0x000002727afd2770>
#> <environment: namespace:stats>
Created on 2024-04-26 with reprex v2.1.0
You can apply those functions (with some n>2) and then run the test:
all_zero_row <- function(m) {
apply(m == 0, 1, all)
}
has_zeros_row <- function(m) {
any(all_zero_row(m))
}
library(glmmTMB)
data("mtcars")
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)
m <- glmmTMB(
mpg ~ cyl + gear,
data = mtcars,
contrasts = list(cyl = contr.sum, gear = contr.treatment)
)
glmmtmb_contr_foos <- m$modelInfo$contrasts
glmmtmb_contr_foos |>
lapply(\(f) f(3)) |>
lapply(has_zeros_row)
#> $cyl
#> [1] FALSE
#>
#> $gear
#> [1] TRUE
#>
include_reference = TRUE
should only add the referent when standard dummy coding is used, but for some reason it also adds a (wrong) 0 whendatawizard::contr.deviation()
is used.