stan-dev / projpred

Projection predictive variable selection
https://mc-stan.org/projpred/
Other
110 stars 25 forks source link

Amend #461, #463, #465: allow changing `nloo`; K-fold CV needs `cvfits`; fewer restrictions; add message #466

Closed fweber144 closed 11 months ago

fweber144 commented 11 months ago

This amends #461, #463, and #465 (i.e., the new varsel.vsel() and cv_varsel.vsel() methods) by:

These are only the most important changes. See the commit messages for details.

fweber144 commented 11 months ago

Illustration:

devtools::load_all()
dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)
rfit <- rstanarm::stan_glm(
  y ~ X1 + X2 + X3 + X4 + X5, family = gaussian(), data = dat_gauss,
  QR = TRUE, chains = 1, iter = 1000, refresh = 0, seed = 9876
)
cv_fits <- run_cvfun(rfit, K = 2, seed = 3462)

# varsel ------------------------------------------------------------------

vs_search <- varsel(rfit, method = "L1", nterms_max = 3, refit_prj = FALSE,
                    seed = 5555)

vs_eval <- varsel(vs_search,
                  nclusters_pred = 10,
                  seed = 6666)

# cv_varsel ---------------------------------------------------------------

## `validate_search = FALSE` ----------------------------------------------

### PSIS-LOO CV -----------------------------------------------------------

cvvs_search_valF_loo <- cv_varsel(rfit, nloo = 20, validate_search = FALSE, method = "L1",
                                  nterms_max = 3, refit_prj = FALSE, seed = 5555)

cvvs_eval_valF_loo <- cv_varsel(cvvs_search_valF_loo,
                                nclusters_pred = 10,
                                seed = 6666)

cvvs_eval_valF_loo_from_vs <- cv_varsel(vs_search,
                                        nloo = 20,
                                        validate_search = FALSE,
                                        nclusters_pred = 10,
                                        seed = 6666)

### K-fold CV -------------------------------------------------------------

cvvs_search_valF <- cv_varsel(rfit, validate_search = FALSE, method = "L1",
                              cv_method = "kfold", cvfits = cv_fits,
                              nterms_max = 3, nclusters_pred = 2, seed = 5555)

cvvs_eval_valF <- cv_varsel(cvvs_search_valF,
                            nclusters_pred = 10,
                            seed = 6666)

cvvs_eval_valF_from_vs <- cv_varsel(vs_search,
                                    validate_search = FALSE,
                                    cv_method = "kfold", cvfits = cv_fits,
                                    nclusters_pred = 10,
                                    seed = 6666)

cvvs_eval_valF_from_loo <- cv_varsel(cvvs_search_valF_loo,
                                     cv_method = "kfold", cvfits = cv_fits,
                                     nclusters_pred = 10,
                                     seed = 6666)

### PSIS-LOO CV with old K-fold CV results --------------------------------

cvvs_eval_valF_loo_from_valF <- cv_varsel(cvvs_search_valF,
                                          cv_method = "LOO", nloo = 20,
                                          nclusters_pred = 10,
                                          seed = 6666)

## `validate_search = TRUE` -----------------------------------------------

### PSIS-LOO CV -----------------------------------------------------------

cvvs_search_valT_loo <- cv_varsel(rfit, nloo = 20, method = "L1", nterms_max = 3,
                                  refit_prj = FALSE, seed = 5555)

cvvs_eval_valT_loo <- cv_varsel(cvvs_search_valT_loo,
                                nclusters_pred = 10,
                                seed = 6666)

cvvs_eval_valT_loo_from_vs <- cv_varsel(vs_search,
                                        nloo = 20,
                                        nclusters_pred = 10,
                                        seed = 6666)

cvvs_eval_valT_loo_from_valF_loo <- cv_varsel(cvvs_search_valF_loo,
                                              validate_search = TRUE,
                                              nclusters_pred = 10,
                                              seed = 6666)

cvvs_eval_valT_loo_from_valF <- cv_varsel(cvvs_search_valF,
                                          cv_method = "LOO", nloo = 20,
                                          validate_search = TRUE,
                                          nclusters_pred = 10,
                                          seed = 6666)

### K-fold CV -------------------------------------------------------------

cvvs_search_valT <- cv_varsel(rfit, method = "L1", cv_method = "kfold", cvfits = cv_fits,
                              nterms_max = 3, refit_prj = FALSE, seed = 5555)

cvvs_eval_valT <- cv_varsel(cvvs_search_valT,
                            nclusters_pred = 10,
                            seed = 6666)

cvvs_eval_valT_from_vs <- cv_varsel(vs_search,
                                    cv_method = "kfold", cvfits = cv_fits,
                                    nclusters_pred = 10,
                                    seed = 6666)

cvvs_eval_valT_from_valF_loo <- cv_varsel(cvvs_search_valF_loo,
                                          cv_method = "kfold", cvfits = cv_fits,
                                          validate_search = TRUE,
                                          nclusters_pred = 10,
                                          seed = 6666)

cvvs_eval_valT_from_valF <- cv_varsel(cvvs_search_valF,
                                      validate_search = TRUE,
                                      nclusters_pred = 10,
                                      seed = 6666)

cvvs_eval_valT_from_valT_loo <- cv_varsel(cvvs_search_valT_loo,
                                          cv_method = "kfold", cvfits = cv_fits,
                                          nclusters_pred = 10,
                                          seed = 6666)

### PSIS-LOO CV with old K-fold CV results --------------------------------

cvvs_eval_valT_loo_from_valT <- cv_varsel(cvvs_search_valT,
                                          cv_method = "LOO", nloo = 20,
                                          nclusters_pred = 10,
                                          seed = 6666)

## `validate_search = FALSE` with old `validate_search = TRUE` resu -------
## lts

cvvs_eval_valF_loo_from_valT_loo <- cv_varsel(cvvs_search_valT_loo,
                                              validate_search = FALSE,
                                              nclusters_pred = 10,
                                              seed = 6666)

cvvs_eval_valF_from_valT_loo <- cv_varsel(cvvs_search_valT_loo,
                                          cv_method = "kfold", cvfits = cv_fits,
                                          validate_search = FALSE,
                                          nclusters_pred = 10,
                                          seed = 6666)

cvvs_eval_valF_loo_from_valT <- cv_varsel(cvvs_search_valT,
                                          cv_method = "LOO", nloo = 20,
                                          validate_search = FALSE,
                                          nclusters_pred = 10,
                                          seed = 6666)

cvvs_eval_valF_from_valT <- cv_varsel(cvvs_search_valT,
                                      validate_search = FALSE,
                                      nclusters_pred = 10,
                                      seed = 6666)

# varsel() with old cv_varsel() results -----------------------------------

vs_eval_from_valF_loo <- varsel(cvvs_search_valF_loo,
                                nclusters_pred = 10,
                                seed = 6666)

vs_eval_from_valF <- varsel(cvvs_search_valF,
                            nclusters_pred = 10,
                            seed = 6666)

vs_eval_from_valT_loo <- varsel(cvvs_search_valT_loo,
                                nclusters_pred = 10,
                                seed = 6666)

vs_eval_from_valT <- varsel(cvvs_search_valT,
                            nclusters_pred = 10,
                            seed = 6666)