stan-dev / rstanarm

rstanarm R package for Bayesian applied regression modeling
https://mc-stan.org/rstanarm
GNU General Public License v3.0
385 stars 132 forks source link

contrasts argument ignored by stan_glm #572

Open wds15 opened 2 years ago

wds15 commented 2 years ago

Summary:

The contrast argument does not get honoured with stan_glm...contrasts can only be modified via the global options.

Description:

This is a bug!

Reproducible Steps:

library(rstanarm)
#> Loading required package: Rcpp
#> This is rstanarm version 2.21.1
#> - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
#> - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
#> - For execution on a local, multicore CPU with excess RAM we recommend calling
#>   options(mc.cores = parallel::detectCores())
library(MASS)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:MASS':
#> 
#>     select
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(posterior)
#> This is posterior version 1.3.0
#> 
#> Attaching package: 'posterior'
#> The following objects are masked from 'package:stats':
#> 
#>     mad, sd, var

set.seed(3054698)

L <- 10
df <- data.frame(y=rpois(L, 10), level=1:L) %>% mutate(olevel=factor(level, ordered=TRUE))

df
#>     y level olevel
#> 1  14     1      1
#> 2   1     2      2
#> 3   4     3      3
#> 4  10     4      4
#> 5  12     5      5
#> 6   8     6      6
#> 7   9     7      7
#> 8   9     8      8
#> 9  18     9      9
#> 10 12    10     10
df$olevel
#>  [1] 1  2  3  4  5  6  7  8  9  10
#> Levels: 1 < 2 < 3 < 4 < 5 < 6 < 7 < 8 < 9 < 10

pf1 <- glm(y ~ 1 + olevel, data=df, family=poisson, contrasts=list(olevel=MASS::contr.sdif))
model.matrix(pf1)
#>    (Intercept) olevel2-1 olevel3-2 olevel4-3 olevel5-4 olevel6-5 olevel7-6
#> 1            1      -0.9      -0.8      -0.7      -0.6      -0.5      -0.4
#> 2            1       0.1      -0.8      -0.7      -0.6      -0.5      -0.4
#> 3            1       0.1       0.2      -0.7      -0.6      -0.5      -0.4
#> 4            1       0.1       0.2       0.3      -0.6      -0.5      -0.4
#> 5            1       0.1       0.2       0.3       0.4      -0.5      -0.4
#> 6            1       0.1       0.2       0.3       0.4       0.5      -0.4
#> 7            1       0.1       0.2       0.3       0.4       0.5       0.6
#> 8            1       0.1       0.2       0.3       0.4       0.5       0.6
#> 9            1       0.1       0.2       0.3       0.4       0.5       0.6
#> 10           1       0.1       0.2       0.3       0.4       0.5       0.6
#>    olevel8-7 olevel9-8 olevel10-9
#> 1       -0.3      -0.2       -0.1
#> 2       -0.3      -0.2       -0.1
#> 3       -0.3      -0.2       -0.1
#> 4       -0.3      -0.2       -0.1
#> 5       -0.3      -0.2       -0.1
#> 6       -0.3      -0.2       -0.1
#> 7       -0.3      -0.2       -0.1
#> 8        0.7      -0.2       -0.1
#> 9        0.7       0.8       -0.1
#> 10       0.7       0.8        0.9
#> attr(,"assign")
#>  [1] 0 1 1 1 1 1 1 1 1 1
#> attr(,"contrasts")
#> attr(,"contrasts")$olevel
#>     2-1  3-2  4-3  5-4  6-5  7-6  8-7  9-8 10-9
#> 1  -0.9 -0.8 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1
#> 2   0.1 -0.8 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1
#> 3   0.1  0.2 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1
#> 4   0.1  0.2  0.3 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1
#> 5   0.1  0.2  0.3  0.4 -0.5 -0.4 -0.3 -0.2 -0.1
#> 6   0.1  0.2  0.3  0.4  0.5 -0.4 -0.3 -0.2 -0.1
#> 7   0.1  0.2  0.3  0.4  0.5  0.6 -0.3 -0.2 -0.1
#> 8   0.1  0.2  0.3  0.4  0.5  0.6  0.7 -0.2 -0.1
#> 9   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8 -0.1
#> 10  0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9

options(contrasts=c(unordered="contr.treatment", ordered="contr.sdif"))

f_inter <- 0.2
total_sd <- 2
inter_sd <- sqrt(f_inter * total_sd^2)
delta_sd <- sqrt( (1-f_inter) * total_sd^2)

inter_sd
#> [1] 0.8944272
delta_sd
#> [1] 1.788854

sqrt(inter_sd^2 + delta_sd^2)
#> [1] 2

## stan_glm IGNORES contrasts argument...must work with defaults!!! TODO: REPORT!!!
spf1 <- stan_glm(y ~ 1 + olevel, data=df, family=poisson, contrasts=list(olevel=contr.helmert),
                 prior_intercept=normal(0, inter_sd, FALSE),
                 prior=normal(0, delta_sd, FALSE),
                 refresh=0,
                 prior_PD=TRUE)

## we got contr.sdif here, but we asked for helmert!!!
mm <- model.matrix(spf1)
mm
#>    (Intercept) olevel2-1 olevel3-2 olevel4-3 olevel5-4 olevel6-5 olevel7-6
#> 1            1      -0.9      -0.8      -0.7      -0.6      -0.5      -0.4
#> 2            1       0.1      -0.8      -0.7      -0.6      -0.5      -0.4
#> 3            1       0.1       0.2      -0.7      -0.6      -0.5      -0.4
#> 4            1       0.1       0.2       0.3      -0.6      -0.5      -0.4
#> 5            1       0.1       0.2       0.3       0.4      -0.5      -0.4
#> 6            1       0.1       0.2       0.3       0.4       0.5      -0.4
#> 7            1       0.1       0.2       0.3       0.4       0.5       0.6
#> 8            1       0.1       0.2       0.3       0.4       0.5       0.6
#> 9            1       0.1       0.2       0.3       0.4       0.5       0.6
#> 10           1       0.1       0.2       0.3       0.4       0.5       0.6
#>    olevel8-7 olevel9-8 olevel10-9
#> 1       -0.3      -0.2       -0.1
#> 2       -0.3      -0.2       -0.1
#> 3       -0.3      -0.2       -0.1
#> 4       -0.3      -0.2       -0.1
#> 5       -0.3      -0.2       -0.1
#> 6       -0.3      -0.2       -0.1
#> 7       -0.3      -0.2       -0.1
#> 8        0.7      -0.2       -0.1
#> 9        0.7       0.8       -0.1
#> 10       0.7       0.8        0.9
#> attr(,"assign")
#>  [1] 0 1 1 1 1 1 1 1 1 1
#> attr(,"contrasts")
#> attr(,"contrasts")$olevel
#> [1] "contr.sdif"

Created on 2022-08-23 by the reprex package (v2.0.1)

RStanARM Version:

2.21.1

R Version:

4.0.1

Operating System:

macOS Monterey