Closed fweber144 closed 1 year ago
On my machine, the following example causes the runtime message to be displayed:
# Data --------------------------------------------------------------------
data("df_gaussian", package = "projpred")
dat <- data.frame(y = df_gaussian$y, df_gaussian$x)
dat$group <- gl(n = 8, k = floor(nrow(dat) / 8), length = nrow(dat),
labels = paste0("gr", seq_len(8)))
set.seed(457211)
group_icpts_truth <- rnorm(nlevels(dat$group), sd = 6)
group_X1_truth <- rnorm(nlevels(dat$group), sd = 6)
icpt <- -4.2
dat$y <- icpt +
group_icpts_truth[dat$group] +
group_X1_truth[dat$group] * dat$X1
dat$y <- rnorm(nrow(dat), mean = dat$y, sd = 4)
# Make the dataset artificially long:
dat <- do.call(rbind, replicate(6, dat, simplify = FALSE))
# Split up into training and test (hold-out) dataset:
idcs_test <- sample.int(nrow(dat), size = nrow(dat) / 3)
dat_train <- dat[-idcs_test, , drop = FALSE]
dat_test <- dat[idcs_test, , drop = FALSE]
# Reference model fit -----------------------------------------------------
rfit_train <- rstanarm::stan_glmer(
y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14 +
X15 + X16 + X17 + X18 + X19 + X20 + (1 | group),
data = dat_train,
cores = 4,
refresh = 0,
seed = 1140350788
)
# projpred ----------------------------------------------------------------
# With projpred at commit c7b1d2d7:
devtools::load_all(".")
options(projpred.extra_verbose = TRUE)
d_test_list <- list(
data = dat_test[, names(dat_test) != "y"],
offset = rep(0, nrow(dat_test)),
weights = rep(1, nrow(dat_test)),
y = dat_test[["y"]]
)
Sys.time()
vs <- varsel(rfit_train,
d_test = d_test_list,
refit_prj = FALSE,
seed = 46782345)
Sys.time()
And the following example doesn't:
# Data --------------------------------------------------------------------
data("df_gaussian", package = "projpred")
dat <- data.frame(y = df_gaussian$y, df_gaussian$x)
dat$group <- gl(n = 8, k = floor(nrow(dat) / 8), length = nrow(dat),
labels = paste0("gr", seq_len(8)))
set.seed(457211)
group_icpts_truth <- rnorm(nlevels(dat$group), sd = 6)
group_X1_truth <- rnorm(nlevels(dat$group), sd = 6)
icpt <- -4.2
dat$y <- icpt +
group_icpts_truth[dat$group] +
group_X1_truth[dat$group] * dat$X1
dat$y <- rnorm(nrow(dat), mean = dat$y, sd = 4)
# Make the dataset artificially long:
dat <- do.call(rbind, replicate(6, dat, simplify = FALSE))
# Split up into training and test (hold-out) dataset:
idcs_test <- sample.int(nrow(dat), size = nrow(dat) / 3)
dat_train <- dat[-idcs_test, , drop = FALSE]
dat_test <- dat[idcs_test, , drop = FALSE]
# Reference model fit -----------------------------------------------------
rfit_train <- rstanarm::stan_glm(
y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14 +
X15 + X16 + X17 + X18 + X19 + X20,
data = dat_train,
cores = 4,
refresh = 0,
seed = 1140350788
)
# projpred ----------------------------------------------------------------
# With projpred at commit c7b1d2d7:
devtools::load_all(".")
options(projpred.extra_verbose = TRUE)
d_test_list <- list(
data = dat_test[, names(dat_test) != "y"],
offset = rep(0, nrow(dat_test)),
weights = rep(1, nrow(dat_test)),
y = dat_test[["y"]]
)
Sys.time()
vs <- varsel(rfit_train,
d_test = d_test_list,
refit_prj = FALSE,
seed = 46782345)
Sys.time()
This refines the estimation of the runtime of the forward search remaining after the projection onto the intercept-only submodel, in particular allowing for an interval estimate in case of multilevel and/or additive ("smooth") terms.
The factors used for scaling up the runtime estimate (coming from the intercept-only projection) were derived empirically as follows:
From these microbenchmark results, we obtain the following factors (I should have assigned the
microbenchmark::microbenchmark()
outputs to different objects instead of working with the hard-coded times here, but I was too lazy to re-run):