stan-dev / projpred

Projection predictive variable selection
https://mc-stan.org/projpred/
Other
109 stars 25 forks source link

Strip underscores from response category names (`as.matrix.projection()` for `brms::categorical()`) #394

Closed fweber144 closed 1 year ago

fweber144 commented 1 year ago

This PR ensures that in case of the brms::categorical() family, underscores are stripped from the response category names in the column names of as.matrix.projection() output, as done by brms.


Illustration

Before this PR:

options(mc.cores = parallel::detectCores(logical = FALSE))

set.seed(856824715)
seed_fit <- sample.int(.Machine$integer.max, 1)
seed_prj <- sample.int(.Machine$integer.max, 1)

data(VA, package = "MASS")
levels(VA$cell) <- paste0("lvl__", levels(VA$cell))

nsbj <- 3L
VA$sbj <- gl(n = nsbj,
             k = floor(nrow(VA) / nsbj),
             length = nrow(VA),
             labels = paste0("subj", seq_len(nsbj)))
VA$sbj <- as.integer(VA$sbj)

ngrp <- 8L
VA$grp <- gl(n = ngrp,
             k = floor(nrow(VA) / ngrp),
             length = nrow(VA),
             labels = paste0("gr", seq_len(ngrp)))
VA$grp <- sample(VA$grp)

bfit <- brms::brm(
  formula = cell ~ treat + age + Karn + (1 | sbj) + (1 | grp),
  data = VA,
  family = brms::categorical(),
  refresh = 0,
  seed = seed_fit
)
#> Compiling Stan program...
#> Start sampling
colnames(as.matrix(bfit))
#>  [1] "b_mulvl2_Intercept"           "b_mulvl3_Intercept"          
#>  [3] "b_mulvl4_Intercept"           "b_mulvl2_treat2"             
#>  [5] "b_mulvl2_age"                 "b_mulvl2_Karn"               
#>  [7] "b_mulvl3_treat2"              "b_mulvl3_age"                
#>  [9] "b_mulvl3_Karn"                "b_mulvl4_treat2"             
#> [11] "b_mulvl4_age"                 "b_mulvl4_Karn"               
#> [13] "sd_grp__mulvl2_Intercept"     "sd_sbj__mulvl2_Intercept"    
#> [15] "sd_grp__mulvl3_Intercept"     "sd_sbj__mulvl3_Intercept"    
#> [17] "sd_grp__mulvl4_Intercept"     "sd_sbj__mulvl4_Intercept"    
#> [19] "r_grp__mulvl2[gr1,Intercept]" "r_grp__mulvl2[gr2,Intercept]"
#> [21] "r_grp__mulvl2[gr3,Intercept]" "r_grp__mulvl2[gr4,Intercept]"
#> [23] "r_grp__mulvl2[gr5,Intercept]" "r_grp__mulvl2[gr6,Intercept]"
#> [25] "r_grp__mulvl2[gr7,Intercept]" "r_grp__mulvl2[gr8,Intercept]"
#> [27] "r_sbj__mulvl2[1,Intercept]"   "r_sbj__mulvl2[2,Intercept]"  
#> [29] "r_sbj__mulvl2[3,Intercept]"   "r_grp__mulvl3[gr1,Intercept]"
#> [31] "r_grp__mulvl3[gr2,Intercept]" "r_grp__mulvl3[gr3,Intercept]"
#> [33] "r_grp__mulvl3[gr4,Intercept]" "r_grp__mulvl3[gr5,Intercept]"
#> [35] "r_grp__mulvl3[gr6,Intercept]" "r_grp__mulvl3[gr7,Intercept]"
#> [37] "r_grp__mulvl3[gr8,Intercept]" "r_sbj__mulvl3[1,Intercept]"  
#> [39] "r_sbj__mulvl3[2,Intercept]"   "r_sbj__mulvl3[3,Intercept]"  
#> [41] "r_grp__mulvl4[gr1,Intercept]" "r_grp__mulvl4[gr2,Intercept]"
#> [43] "r_grp__mulvl4[gr3,Intercept]" "r_grp__mulvl4[gr4,Intercept]"
#> [45] "r_grp__mulvl4[gr5,Intercept]" "r_grp__mulvl4[gr6,Intercept]"
#> [47] "r_grp__mulvl4[gr7,Intercept]" "r_grp__mulvl4[gr8,Intercept]"
#> [49] "r_sbj__mulvl4[1,Intercept]"   "r_sbj__mulvl4[2,Intercept]"  
#> [51] "r_sbj__mulvl4[3,Intercept]"   "lprior"                      
#> [53] "lp__"

library(projpred)
#> This is projpred version 2.4.0.9000.
soltrms <- c("treat", "age", "(1 | sbj)", "(1 | grp)")
prj <- project(
  bfit,
  solution_terms = soltrms,
  nclusters = 2,
  seed = seed_prj
)
### Output shortened manually:
#> Warning: Inner iterations did not coverge - nlminb message: false convergence
#> (8)

#> [...]
###
prjmat <- as.matrix(prj)
#> Warning in as.matrix.projection(prj): Note that projection was performed using
#> clustering and the clusters might have different weights.
colnames(prjmat)
#>  [1] "b_mulvl__2_Intercept"                           
#>  [2] "b_mulvl__3_Intercept"                           
#>  [3] "b_mulvl__4_Intercept"                           
#>  [4] "b_mulvl__2_treat2"                              
#>  [5] "b_mulvl__3_treat2"                              
#>  [6] "b_mulvl__4_treat2"                              
#>  [7] "b_mulvl__2_age"                                 
#>  [8] "b_mulvl__3_age"                                 
#>  [9] "b_mulvl__4_age"                                 
#> [10] "sd_sbj__mulvl__2_Intercept"                     
#> [11] "sd_sbj__mulvl__3_Intercept"                     
#> [12] "sd_sbj__mulvl__4_Intercept"                     
#> [13] "cor_sbj__mulvl__2_Intercept__mulvl__3_Intercept"
#> [14] "cor_sbj__mulvl__2_Intercept__mulvl__4_Intercept"
#> [15] "cor_sbj__mulvl__3_Intercept__mulvl__4_Intercept"
#> [16] "sd_grp__mulvl__2_Intercept"                     
#> [17] "sd_grp__mulvl__3_Intercept"                     
#> [18] "sd_grp__mulvl__4_Intercept"                     
#> [19] "cor_grp__mulvl__2_Intercept__mulvl__3_Intercept"
#> [20] "cor_grp__mulvl__2_Intercept__mulvl__4_Intercept"
#> [21] "cor_grp__mulvl__3_Intercept__mulvl__4_Intercept"
#> [22] "r_sbj__mulvl__2[1,Intercept]"                   
#> [23] "r_sbj__mulvl__2[2,Intercept]"                   
#> [24] "r_sbj__mulvl__2[3,Intercept]"                   
#> [25] "r_sbj__mulvl__3[1,Intercept]"                   
#> [26] "r_sbj__mulvl__3[2,Intercept]"                   
#> [27] "r_sbj__mulvl__3[3,Intercept]"                   
#> [28] "r_sbj__mulvl__4[1,Intercept]"                   
#> [29] "r_sbj__mulvl__4[2,Intercept]"                   
#> [30] "r_sbj__mulvl__4[3,Intercept]"                   
#> [31] "r_grp__mulvl__2[gr1,Intercept]"                 
#> [32] "r_grp__mulvl__2[gr2,Intercept]"                 
#> [33] "r_grp__mulvl__2[gr3,Intercept]"                 
#> [34] "r_grp__mulvl__2[gr4,Intercept]"                 
#> [35] "r_grp__mulvl__2[gr5,Intercept]"                 
#> [36] "r_grp__mulvl__2[gr6,Intercept]"                 
#> [37] "r_grp__mulvl__2[gr7,Intercept]"                 
#> [38] "r_grp__mulvl__2[gr8,Intercept]"                 
#> [39] "r_grp__mulvl__3[gr1,Intercept]"                 
#> [40] "r_grp__mulvl__3[gr2,Intercept]"                 
#> [41] "r_grp__mulvl__3[gr3,Intercept]"                 
#> [42] "r_grp__mulvl__3[gr4,Intercept]"                 
#> [43] "r_grp__mulvl__3[gr5,Intercept]"                 
#> [44] "r_grp__mulvl__3[gr6,Intercept]"                 
#> [45] "r_grp__mulvl__3[gr7,Intercept]"                 
#> [46] "r_grp__mulvl__3[gr8,Intercept]"                 
#> [47] "r_grp__mulvl__4[gr1,Intercept]"                 
#> [48] "r_grp__mulvl__4[gr2,Intercept]"                 
#> [49] "r_grp__mulvl__4[gr3,Intercept]"                 
#> [50] "r_grp__mulvl__4[gr4,Intercept]"                 
#> [51] "r_grp__mulvl__4[gr5,Intercept]"                 
#> [52] "r_grp__mulvl__4[gr6,Intercept]"                 
#> [53] "r_grp__mulvl__4[gr7,Intercept]"                 
#> [54] "r_grp__mulvl__4[gr8,Intercept]"

Created on 2023-03-06 with reprex v2.0.2

After this PR:

options(mc.cores = parallel::detectCores(logical = FALSE))

set.seed(856824715)
seed_fit <- sample.int(.Machine$integer.max, 1)
seed_prj <- sample.int(.Machine$integer.max, 1)

data(VA, package = "MASS")
levels(VA$cell) <- paste0("lvl__", levels(VA$cell))

nsbj <- 3L
VA$sbj <- gl(n = nsbj,
             k = floor(nrow(VA) / nsbj),
             length = nrow(VA),
             labels = paste0("subj", seq_len(nsbj)))
VA$sbj <- as.integer(VA$sbj)

ngrp <- 8L
VA$grp <- gl(n = ngrp,
             k = floor(nrow(VA) / ngrp),
             length = nrow(VA),
             labels = paste0("gr", seq_len(ngrp)))
VA$grp <- sample(VA$grp)

bfit <- brms::brm(
  formula = cell ~ treat + age + Karn + (1 | sbj) + (1 | grp),
  data = VA,
  family = brms::categorical(),
  refresh = 0,
  seed = seed_fit
)
#> Compiling Stan program...
#> Start sampling
colnames(as.matrix(bfit))
#>  [1] "b_mulvl2_Intercept"           "b_mulvl3_Intercept"          
#>  [3] "b_mulvl4_Intercept"           "b_mulvl2_treat2"             
#>  [5] "b_mulvl2_age"                 "b_mulvl2_Karn"               
#>  [7] "b_mulvl3_treat2"              "b_mulvl3_age"                
#>  [9] "b_mulvl3_Karn"                "b_mulvl4_treat2"             
#> [11] "b_mulvl4_age"                 "b_mulvl4_Karn"               
#> [13] "sd_grp__mulvl2_Intercept"     "sd_sbj__mulvl2_Intercept"    
#> [15] "sd_grp__mulvl3_Intercept"     "sd_sbj__mulvl3_Intercept"    
#> [17] "sd_grp__mulvl4_Intercept"     "sd_sbj__mulvl4_Intercept"    
#> [19] "r_grp__mulvl2[gr1,Intercept]" "r_grp__mulvl2[gr2,Intercept]"
#> [21] "r_grp__mulvl2[gr3,Intercept]" "r_grp__mulvl2[gr4,Intercept]"
#> [23] "r_grp__mulvl2[gr5,Intercept]" "r_grp__mulvl2[gr6,Intercept]"
#> [25] "r_grp__mulvl2[gr7,Intercept]" "r_grp__mulvl2[gr8,Intercept]"
#> [27] "r_sbj__mulvl2[1,Intercept]"   "r_sbj__mulvl2[2,Intercept]"  
#> [29] "r_sbj__mulvl2[3,Intercept]"   "r_grp__mulvl3[gr1,Intercept]"
#> [31] "r_grp__mulvl3[gr2,Intercept]" "r_grp__mulvl3[gr3,Intercept]"
#> [33] "r_grp__mulvl3[gr4,Intercept]" "r_grp__mulvl3[gr5,Intercept]"
#> [35] "r_grp__mulvl3[gr6,Intercept]" "r_grp__mulvl3[gr7,Intercept]"
#> [37] "r_grp__mulvl3[gr8,Intercept]" "r_sbj__mulvl3[1,Intercept]"  
#> [39] "r_sbj__mulvl3[2,Intercept]"   "r_sbj__mulvl3[3,Intercept]"  
#> [41] "r_grp__mulvl4[gr1,Intercept]" "r_grp__mulvl4[gr2,Intercept]"
#> [43] "r_grp__mulvl4[gr3,Intercept]" "r_grp__mulvl4[gr4,Intercept]"
#> [45] "r_grp__mulvl4[gr5,Intercept]" "r_grp__mulvl4[gr6,Intercept]"
#> [47] "r_grp__mulvl4[gr7,Intercept]" "r_grp__mulvl4[gr8,Intercept]"
#> [49] "r_sbj__mulvl4[1,Intercept]"   "r_sbj__mulvl4[2,Intercept]"  
#> [51] "r_sbj__mulvl4[3,Intercept]"   "lprior"                      
#> [53] "lp__"

library(projpred)
#> This is projpred version 2.4.0.9000.
soltrms <- c("treat", "age", "(1 | sbj)", "(1 | grp)")
prj <- project(
  bfit,
  solution_terms = soltrms,
  nclusters = 2,
  seed = seed_prj
)
### Output shortened manually:
#> Warning: Inner iterations did not coverge - nlminb message: false convergence
#> (8)

#> [...]
###
prjmat <- as.matrix(prj)
#> Warning in as.matrix.projection(prj): Note that projection was performed using
#> clustering and the clusters might have different weights.
colnames(prjmat)
#>  [1] "b_mulvl2_Intercept"                         
#>  [2] "b_mulvl3_Intercept"                         
#>  [3] "b_mulvl4_Intercept"                         
#>  [4] "b_mulvl2_treat2"                            
#>  [5] "b_mulvl3_treat2"                            
#>  [6] "b_mulvl4_treat2"                            
#>  [7] "b_mulvl2_age"                               
#>  [8] "b_mulvl3_age"                               
#>  [9] "b_mulvl4_age"                               
#> [10] "sd_sbj__mulvl2_Intercept"                   
#> [11] "sd_sbj__mulvl3_Intercept"                   
#> [12] "sd_sbj__mulvl4_Intercept"                   
#> [13] "cor_sbj__mulvl2_Intercept__mulvl3_Intercept"
#> [14] "cor_sbj__mulvl2_Intercept__mulvl4_Intercept"
#> [15] "cor_sbj__mulvl3_Intercept__mulvl4_Intercept"
#> [16] "sd_grp__mulvl2_Intercept"                   
#> [17] "sd_grp__mulvl3_Intercept"                   
#> [18] "sd_grp__mulvl4_Intercept"                   
#> [19] "cor_grp__mulvl2_Intercept__mulvl3_Intercept"
#> [20] "cor_grp__mulvl2_Intercept__mulvl4_Intercept"
#> [21] "cor_grp__mulvl3_Intercept__mulvl4_Intercept"
#> [22] "r_sbj__mulvl2[1,Intercept]"                 
#> [23] "r_sbj__mulvl2[2,Intercept]"                 
#> [24] "r_sbj__mulvl2[3,Intercept]"                 
#> [25] "r_sbj__mulvl3[1,Intercept]"                 
#> [26] "r_sbj__mulvl3[2,Intercept]"                 
#> [27] "r_sbj__mulvl3[3,Intercept]"                 
#> [28] "r_sbj__mulvl4[1,Intercept]"                 
#> [29] "r_sbj__mulvl4[2,Intercept]"                 
#> [30] "r_sbj__mulvl4[3,Intercept]"                 
#> [31] "r_grp__mulvl2[gr1,Intercept]"               
#> [32] "r_grp__mulvl2[gr2,Intercept]"               
#> [33] "r_grp__mulvl2[gr3,Intercept]"               
#> [34] "r_grp__mulvl2[gr4,Intercept]"               
#> [35] "r_grp__mulvl2[gr5,Intercept]"               
#> [36] "r_grp__mulvl2[gr6,Intercept]"               
#> [37] "r_grp__mulvl2[gr7,Intercept]"               
#> [38] "r_grp__mulvl2[gr8,Intercept]"               
#> [39] "r_grp__mulvl3[gr1,Intercept]"               
#> [40] "r_grp__mulvl3[gr2,Intercept]"               
#> [41] "r_grp__mulvl3[gr3,Intercept]"               
#> [42] "r_grp__mulvl3[gr4,Intercept]"               
#> [43] "r_grp__mulvl3[gr5,Intercept]"               
#> [44] "r_grp__mulvl3[gr6,Intercept]"               
#> [45] "r_grp__mulvl3[gr7,Intercept]"               
#> [46] "r_grp__mulvl3[gr8,Intercept]"               
#> [47] "r_grp__mulvl4[gr1,Intercept]"               
#> [48] "r_grp__mulvl4[gr2,Intercept]"               
#> [49] "r_grp__mulvl4[gr3,Intercept]"               
#> [50] "r_grp__mulvl4[gr4,Intercept]"               
#> [51] "r_grp__mulvl4[gr5,Intercept]"               
#> [52] "r_grp__mulvl4[gr6,Intercept]"               
#> [53] "r_grp__mulvl4[gr7,Intercept]"               
#> [54] "r_grp__mulvl4[gr8,Intercept]"

Created on 2023-03-06 with reprex v2.0.2