Closed lbui30 closed 1 year ago
Hello @lbui30 π
I'm not able to reproduce the error on my side, can you run the following code as a reprex and report back what you get? That would be awesome
library(pls)
#>
#> Attaching package: 'pls'
#> The following object is masked from 'package:stats':
#>
#> loadings
library(tidymodels)
library(sessioninfo)
data(meats)
norm_rec <-
recipe(water + fat + protein ~ ., data = meats) %>%
step_normalize(everything())
set.seed(57343)
folds <- vfold_cv(meats, repeats = 10)
folds <-
folds %>%
mutate(recipes = map(splits, prepper, recipe = norm_rec))
get_var_explained <- function(recipe, ...) {
# Extract the predictors and outcomes into their own matrices
y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes())
x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors())
# The pls package prefers the data in a data frame where the outcome
# and predictors are in _matrices_. To make sure this is formatted
# properly, use the `I()` function to inhibit `data.frame()` from making
# all the individual columns. `pls_format` should have two columns.
pls_format <- data.frame(
endpoints = I(y_mat),
measurements = I(x_mat)
)
# Fit the model
mod <- plsr(endpoints ~ measurements, data = pls_format)
# Get the proportion of the predictor variance that is explained
# by the model for different number of components.
xve <- explvar(mod)/100
# To do the same for the outcome, it is more complex. This code
# was extracted from pls:::summary.mvr.
explained <-
drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) %>%
# transpose so that components are in rows
t() %>%
as_tibble() %>%
# Add the predictor proportions
mutate(predictors = cumsum(xve) %>% as.vector(),
components = seq_along(xve)) %>%
# Put into a tidy format that is tall
pivot_longer(
cols = c(-components),
names_to = "source",
values_to = "proportion"
)
}
folds <-
folds %>%
mutate(var = map(recipes, get_var_explained),
var = unname(var))
folds
#> # 10-fold cross-validation repeated 10 times
#> # A tibble: 100 Γ 5
#> splits id id2 recipes var
#> <list> <chr> <chr> <list> <list>
#> 1 <split [193/22]> Repeat01 Fold01 <recipe> <tibble [400 Γ 3]>
#> 2 <split [193/22]> Repeat01 Fold02 <recipe> <tibble [400 Γ 3]>
#> 3 <split [193/22]> Repeat01 Fold03 <recipe> <tibble [400 Γ 3]>
#> 4 <split [193/22]> Repeat01 Fold04 <recipe> <tibble [400 Γ 3]>
#> 5 <split [193/22]> Repeat01 Fold05 <recipe> <tibble [400 Γ 3]>
#> 6 <split [194/21]> Repeat01 Fold06 <recipe> <tibble [400 Γ 3]>
#> 7 <split [194/21]> Repeat01 Fold07 <recipe> <tibble [400 Γ 3]>
#> 8 <split [194/21]> Repeat01 Fold08 <recipe> <tibble [400 Γ 3]>
#> 9 <split [194/21]> Repeat01 Fold09 <recipe> <tibble [400 Γ 3]>
#> 10 <split [194/21]> Repeat01 Fold10 <recipe> <tibble [400 Γ 3]>
#> # β¦ with 90 more rows
sessioninfo::session_info()
#> β Session info βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> setting value
#> version R version 4.2.1 (2022-06-23)
#> os macOS Monterey 12.6
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Los_Angeles
#> date 2023-02-05
#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#>
#> β Packages βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [2] CRAN (R 4.2.0)
#> broom * 1.0.2 2022-12-15 [1] CRAN (R 4.2.0)
#> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.1)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.1)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.1)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0)
#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.1)
#> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0)
#> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.0)
#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.19 2022-12-13 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.2.0)
#> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [2] CRAN (R 4.2.0)
#> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0)
#> future 1.30.0 2022-12-16 [1] CRAN (R 4.2.0)
#> future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.1)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.1)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.0)
#> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0)
#> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.0)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0)
#> highr 0.10 2022-12-22 [1] CRAN (R 4.2.0)
#> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.0)
#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1)
#> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0)
#> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0)
#> knitr 1.41 2022-11-18 [1] CRAN (R 4.2.0)
#> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.1)
#> lava 1.7.1 2023-01-06 [1] CRAN (R 4.2.1)
#> lhs 1.1.6 2022-12-17 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.0)
#> lubridate 1.9.0 2022-11-06 [1] CRAN (R 4.2.1)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> MASS 7.3-57 2022-04-22 [2] CRAN (R 4.2.1)
#> Matrix 1.5-3 2022-11-11 [1] CRAN (R 4.2.0)
#> modeldata * 1.0.1 2022-09-06 [1] CRAN (R 4.2.1)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> nnet 7.3-17 2022-01-16 [2] CRAN (R 4.2.1)
#> parallelly 1.34.0 2023-01-13 [1] CRAN (R 4.2.0)
#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> pls * 2.8-1 2022-07-16 [1] CRAN (R 4.2.0)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0)
#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.0)
#> R.cache 0.16.0 2022-07-21 [2] CRAN (R 4.2.0)
#> R.methodsS3 1.8.2 2022-06-13 [2] CRAN (R 4.2.0)
#> R.oo 1.25.0 2022-06-12 [2] CRAN (R 4.2.0)
#> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.0)
#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.0)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0)
#> rmarkdown 2.19 2022-12-15 [1] CRAN (R 4.2.0)
#> rpart 4.1.16 2022-01-24 [2] CRAN (R 4.2.1)
#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.0)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0)
#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0)
#> sessioninfo * 1.2.2 2021-12-06 [2] CRAN (R 4.2.0)
#> stringi 1.7.12 2023-01-11 [1] CRAN (R 4.2.0)
#> stringr 1.5.0 2022-12-02 [1] CRAN (R 4.2.1)
#> styler 1.9.0 2023-01-15 [1] CRAN (R 4.2.0)
#> survival 3.3-1 2022-03-03 [2] CRAN (R 4.2.1)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0)
#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.1 2022-09-08 [1] CRAN (R 4.2.0)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.0)
#> timeDate 4022.108 2023-01-07 [1] CRAN (R 4.2.1)
#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> vctrs 0.5.2 2023-01-23 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0)
#> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.0)
#> xfun 0.36 2022-12-21 [1] CRAN (R 4.2.0)
#> yaml 2.3.6 2022-10-18 [1] CRAN (R 4.2.0)
#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0)
#>
#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library
#> [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#>
#> ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Created on 2023-02-05 with reprex v2.0.2
So the code had in your reprex ran at first until... I made it match what I had.
It's been very difficult to run reproducible examples or roll back because if I try to debug the my code our your code (using recover/browser), my R randomly occasionally crashes (hence why this was listed as a bug). When I have tried to roll back the original purr 0.3.5 this tutorial used then tidy-verse won't work install problem because it talk about missing purr version 1.0.1. So, I am only able to create a reprex on the latest version of R and not run it back in R 4.2.0 and purr version 0.3.5.
After re-installing R and R-studio 2 times, this is what I have so far.....
library(pls)
#> Attaching package: 'pls'
#> The following object is masked from 'package:stats':
#>
#> loadings
library(tidymodels)
library(sessioninfo)
data(meats)
norm_rec <-
recipe(water + fat + protein ~ ., data = meats) %>%
step_normalize(everything())
set.seed(57343)
folds <- vfold_cv(meats, repeats = 10)
folds <-
folds %>%
mutate(recipes = map(splits, prepper, recipe = norm_rec))
get_var_explained <- function(recipe, ...) {
# Extract the predictors and outcomes into their own matrices
y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes())
x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors())
# The pls package prefers the data in a data frame where the outcome
# and predictors are in _matrices_. To make sure this is formatted
# properly, use the `I()` function to inhibit `data.frame()` from making
# all the individual columns. `pls_format` should have two columns.
pls_format <- data.frame(
endpoints = I(y_mat),
measurements = I(x_mat)
)
# Fit the model
mod <- plsr(endpoints ~ measurements, data = pls_format)
# Get the proportion of the predictor variance that is explained
# by the model for different number of components.
xve <- explvar(mod)/100
# To do the same for the outcome, it is more complex. This code
# was extracted from pls:::summary.mvr.
explained <-
drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) %>%
# transpose so that components are in rows
t() %>%
as_tibble() %>%
# Add the predictor proportions
mutate(predictors = cumsum(xve) %>% as.vector(),
components = seq_along(xve)) %>%
# Put into a tidy format that is tall
pivot_longer(
cols = c(-components),
names_to = "source",
values_to = "proportion"
)
}
folds <-
folds %>%
mutate(var = map(recipes, get_var_explained),
var = unname(var))
folds
#> # 10-fold cross-validation repeated 10 times
#> # A tibble: 100 Γ 5
#> splits id id2 recipes var
#> <list> <chr> <chr> <list> <list>
#> 1 <split [193/22]> Repeat01 Fold01 <recipe> <tibble [400 Γ 3]>
#> 2 <split [193/22]> Repeat01 Fold02 <recipe> <tibble [400 Γ 3]>
#> 3 <split [193/22]> Repeat01 Fold03 <recipe> <tibble [400 Γ 3]>
#> 4 <split [193/22]> Repeat01 Fold04 <recipe> <tibble [400 Γ 3]>
#> 5 <split [193/22]> Repeat01 Fold05 <recipe> <tibble [400 Γ 3]>
#> 6 <split [194/21]> Repeat01 Fold06 <recipe> <tibble [400 Γ 3]>
#> 7 <split [194/21]> Repeat01 Fold07 <recipe> <tibble [400 Γ 3]>
#> 8 <split [194/21]> Repeat01 Fold08 <recipe> <tibble [400 Γ 3]>
#> 9 <split [194/21]> Repeat01 Fold09 <recipe> <tibble [400 Γ 3]>
#> 10 <split [194/21]> Repeat01 Fold10 <recipe> <tibble [400 Γ 3]>
#> # β¦ with 90 more rows
sessioninfo::session_info()
#> β Session info βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> setting value
#> version R version 4.2.2 (2022-10-31 ucrt)
#> os Windows 11 x64 (build 22000)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/New_York
#> date 2023-02-11
#> pandoc 2.19.2 @ C:/Program Files/RStudio/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> β Packages βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> package * version date (UTC) lib source
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> broom * 1.0.3 2023-01-25 [1] CRAN (R 4.2.2)
#> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.2)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.2)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.2)
#> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.2)
#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.2)
#> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.2)
#> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.2)
#> dplyr * 1.1.0 2023-01-29 [1] CRAN (R 4.2.2)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.2)
#> evaluate 0.20 2023-01-17 [1] CRAN (R 4.2.2)
#> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.2)
#> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.2)
#> fs 1.6.0 2023-01-23 [1] CRAN (R 4.2.2)
#> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.2)
#> future 1.31.0 2023-02-01 [1] CRAN (R 4.2.2)
#> future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.2)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.2)
#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.2)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.2)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.2)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.2)
#> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.2)
#> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.2)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.2)
#> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.2)
#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.2)
#> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.2)
#> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.2)
#> knitr 1.42 2023-01-25 [1] CRAN (R 4.2.2)
#> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.2)
#> lava 1.7.1 2023-01-06 [1] CRAN (R 4.2.2)
#> lhs 1.1.6 2022-12-17 [1] CRAN (R 4.2.2)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.2)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.2)
#> lubridate 1.9.1 2023-01-24 [1] CRAN (R 4.2.2)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.2)
#> MASS 7.3-58.1 2022-08-03 [2] CRAN (R 4.2.2)
#> Matrix 1.5-1 2022-09-13 [2] CRAN (R 4.2.2)
#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.2.2)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.2)
#> nnet 7.3-18 2022-09-28 [2] CRAN (R 4.2.2)
#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.2)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.2)
#> pls * 2.8-1 2022-07-16 [1] CRAN (R 4.2.2)
#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.2)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.2)
#> Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.2.2)
#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.2)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.2)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.2)
#> rmarkdown 2.20 2023-01-19 [1] CRAN (R 4.2.2)
#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.2)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.2)
#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.2)
#> sessioninfo * 1.2.2 2021-12-06 [1] CRAN (R 4.2.2)
#> survival 3.4-0 2022-08-09 [2] CRAN (R 4.2.2)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.2)
#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.2)
#> tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.2)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.2)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.2)
#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.2)
#> vctrs 0.5.2 2023-01-23 [1] CRAN (R 4.2.2)
#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.2)
#> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.2)
#>
#> [1] C:/Users/lechi/AppData/Local/R/win-library/4.2
#> [2] C:/Program Files/R/R-4.2.2/library
#>
#>
I was able to reproduce the errors after changing the conditions to recipe(protein ~ ., data = meats)
as you can see below. I was able to isolate the issue to something to do something before predictors = cumsum(xve) %>% as.vector()
.
The challenge is I cannot seem to figure out more information because the log output from purr is extremely hard to read which line of code errored out. Which is why I brought up this issue in the purr package. If recover/browser were reliable when using purr (less crashes), then I could figure out what is going on hence why I suspect its a bug.
library(modeldata)
library(tidyverse)
library(pls)
#>
#> Attaching package: 'pls'
#> The following object is masked from 'package:stats':
#>
#> loadings
library(tidymodels)
library(sessioninfo)
data(meats)
# ====> This is the line that is different between your reprex and mine <====
norm_rec <-
recipe(protein ~ ., data = meats) %>%
step_normalize(everything())
set.seed(57343)
folds <- vfold_cv(meats, repeats = 10)
folds <- folds %>%
mutate(recipes = map(splits, prepper, recipe = norm_rec))
get_var_explained <- function(recipe, ...) {
# Extract the predictors and outcomes into their own matrices
y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes())
x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors())
# The pls package prefers the data in a data frame where the outcome
# and predictors are in _matrices_. To make sure this is formatted
# properly, use the `I()` function to inhibit `data.frame()` from making
# all the individual columns. `pls_format` should have two columns.
pls_format <- data.frame(
endpoints = I(y_mat),
measurements = I(x_mat)
)
# Fit the model
mod <- plsr(endpoints ~ measurements, data = pls_format)
# Get the proportion of the predictor variance that is explained
# by the model for different number of components.
xve <- explvar(mod)/100
# To do the same for the outcome, it is more complex. This code
# was extracted from pls:::summary.mvr.
explained <-
drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) %>%
# transpose so that components are in rows
t() %>%
as_tibble() %>%
# Add the predictor proportions
mutate(predictors = cumsum(xve) %>% as.vector(),
components = seq_along(xve)) %>%
# Put into a tidy format that is tall
pivot_longer(
cols = c(-components),
names_to = "source",
values_to = "proportion"
)
}
#This line done so it comes before the erro
sessioninfo::session_info()
#> β Session info βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> setting value
#> version R version 4.2.2 (2022-10-31 ucrt)
#> os Windows 11 x64 (build 22000)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/New_York
#> date 2023-02-11
#> pandoc 2.19.2 @ C:/Program Files/RStudio/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> β Packages βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.2)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> broom * 1.0.3 2023-01-25 [1] CRAN (R 4.2.2)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.2)
#> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.2)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.2)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.2)
#> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.2)
#> crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.2)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.2)
#> dbplyr 2.3.0 2023-01-16 [1] CRAN (R 4.2.2)
#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.2)
#> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.2)
#> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.2)
#> dplyr * 1.1.0 2023-01-29 [1] CRAN (R 4.2.2)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.2)
#> evaluate 0.20 2023-01-17 [1] CRAN (R 4.2.2)
#> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.2)
#> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.2.2)
#> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.2)
#> fs 1.6.0 2023-01-23 [1] CRAN (R 4.2.2)
#> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.2)
#> future 1.31.0 2023-02-01 [1] CRAN (R 4.2.2)
#> future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.2)
#> gargle 1.3.0 2023-01-30 [1] CRAN (R 4.2.2)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.2)
#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.2)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.2)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.2)
#> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.2)
#> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.2)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.2)
#> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.2)
#> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.2)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.2)
#> haven 2.5.1 2022-08-22 [1] CRAN (R 4.2.2)
#> hms 1.1.2 2022-08-19 [1] CRAN (R 4.2.2)
#> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.2)
#> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.2)
#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.2)
#> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.2)
#> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.2)
#> jsonlite 1.8.4 2022-12-06 [1] CRAN (R 4.2.2)
#> knitr 1.42 2023-01-25 [1] CRAN (R 4.2.2)
#> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.2)
#> lava 1.7.1 2023-01-06 [1] CRAN (R 4.2.2)
#> lhs 1.1.6 2022-12-17 [1] CRAN (R 4.2.2)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.2)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.2)
#> lubridate 1.9.1 2023-01-24 [1] CRAN (R 4.2.2)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.2)
#> MASS 7.3-58.1 2022-08-03 [2] CRAN (R 4.2.2)
#> Matrix 1.5-1 2022-09-13 [2] CRAN (R 4.2.2)
#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.2.2)
#> modelr 0.1.10 2022-11-11 [1] CRAN (R 4.2.2)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.2)
#> nnet 7.3-18 2022-09-28 [2] CRAN (R 4.2.2)
#> parallelly 1.34.0 2023-01-13 [1] CRAN (R 4.2.2)
#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.2)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.2)
#> pls * 2.8-1 2022-07-16 [1] CRAN (R 4.2.2)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.2)
#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.2)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.2)
#> Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.2.2)
#> readr * 2.1.3 2022-10-01 [1] CRAN (R 4.2.2)
#> readxl 1.4.1 2022-08-17 [1] CRAN (R 4.2.2)
#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.2)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.2)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.2)
#> rmarkdown 2.20 2023-01-19 [1] CRAN (R 4.2.2)
#> rpart 4.1.19 2022-10-21 [2] CRAN (R 4.2.2)
#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.2)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.2)
#> rvest 1.0.3 2022-08-19 [1] CRAN (R 4.2.2)
#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.2)
#> sessioninfo * 1.2.2 2021-12-06 [1] CRAN (R 4.2.2)
#> stringi 1.7.12 2023-01-11 [1] CRAN (R 4.2.2)
#> stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.2.2)
#> survival 3.4-0 2022-08-09 [2] CRAN (R 4.2.2)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.2)
#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.2)
#> tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.2)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.2)
#> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.2)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.2)
#> timeDate 4022.108 2023-01-07 [1] CRAN (R 4.2.2)
#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.2)
#> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.2)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.2)
#> vctrs 0.5.2 2023-01-23 [1] CRAN (R 4.2.2)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.2)
#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.2)
#> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.2)
#> xfun 0.36 2022-12-21 [1] CRAN (R 4.2.2)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.2)
#> yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.2)
#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.2)
#>
#> [1] C:/Users/lechi/AppData/Local/R/win-library/4.2
#> [2] C:/Program Files/R/R-4.2.2/library
It errors out here:
folds <-
folds %>%
mutate(var = map(recipes, get_var_explained),
var = unname(var))
#> Error in `mutate()`:
#> βΉ In argument: `var = map(recipes, get_var_explained)`.
#> Caused by error in `map()`:
#> βΉ In index: 1.
#> Caused by error in `mutate()`:
#> βΉ In argument: `predictors = cumsum(xve) %>% as.vector()`.
#> Caused by error:
#> ! `predictors` must be size 1, not 102.
#> Backtrace:
#> β
#> 1. ββfolds %>% ...
#> 2. ββdplyr::mutate(., var = map(recipes, get_var_explained), var = unname(var))
#> 3. ββdplyr:::mutate.data.frame(...)
#> 4. β ββdplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#> 5. β ββbase::withCallingHandlers(...)
#> 6. β ββdplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#> 7. β ββmask$eval_all_mutate(quo)
#> 8. β ββdplyr (local) eval()
#> 9. ββpurrr::map(recipes, get_var_explained)
#> 10. β ββpurrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 11. β ββpurrr:::with_indexed_errors(...)
#> 12. β β ββbase::withCallingHandlers(...)
#> 13. β ββpurrr:::call_with_cleanup(...)
#> 14. β ββglobal .f(.x[[i]], ...)
#> 15. β ββ... %>% ...
#> 16. ββtidyr::pivot_longer(...)
#> 17. ββdplyr::mutate(., predictors = cumsum(xve) %>% as.vector(), components = seq_along(xve))
#> 18. ββdplyr:::mutate.data.frame(., predictors = cumsum(xve) %>% as.vector(), components = seq_along(xve))
#> 19. β ββdplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#> 20. β ββbase::withCallingHandlers(...)
#> 21. β ββdplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#> 22. β ββmask$eval_all_mutate(quo)
#> 23. β ββdplyr (local) eval()
#> 24. ββdplyr:::dplyr_internal_error(...)
#> 25. β ββrlang::abort(class = c(class, "dplyr:::internal_error"), dplyr_error_data = data)
#> 26. β ββrlang:::signal_abort(cnd, .file)
#> 27. β ββbase::signalCondition(cnd)
#> 28. ββdplyr (local) `<fn>`(`<dpl:::__>`)
#> 29. ββrlang::abort(message, class = error_class, parent = parent, call = error_call)
Created on 2023-02-11 with reprex v2.0.2
Its looks like though the reprex is not reading my OS correctly, I will edit it fix it above: os Windows 11 x64 (build 22000)
Thank you! I have been able to reproduce it!
library(modeldata)
library(tidyverse)
library(pls)
#>
#> Attaching package: 'pls'
#> The following object is masked from 'package:stats':
#>
#> loadings
library(tidymodels)
library(sessioninfo)
data(meats)
norm_rec <-
recipe(protein ~ ., data = meats) %>%
step_normalize(everything())
set.seed(57343)
folds <- vfold_cv(meats, repeats = 10) %>%
mutate(recipes = map(splits, prepper, recipe = norm_rec))
get_var_explained <- function(recipe, ...) {
# Extract the predictors and outcomes into their own matrices
y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes())
x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors())
# The pls package prefers the data in a data frame where the outcome
# and predictors are in _matrices_. To make sure this is formatted
# properly, use the `I()` function to inhibit `data.frame()` from making
# all the individual columns. `pls_format` should have two columns.
pls_format <- data.frame(
endpoints = I(y_mat),
measurements = I(x_mat)
)
# Fit the model
mod <- plsr(endpoints ~ measurements, data = pls_format)
# Get the proportion of the predictor variance that is explained
# by the model for different number of components.
xve <- explvar(mod)/100
# To do the same for the outcome, it is more complex. This code
# was extracted from pls:::summary.mvr.
explained <-
drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) %>%
# transpose so that components are in rows
t() %>%
as_tibble() %>%
# Add the predictor proportions
mutate(predictors = cumsum(xve) %>% as.vector(),
components = seq_along(xve)) %>%
# Put into a tidy format that is tall
pivot_longer(
cols = c(-components),
names_to = "source",
values_to = "proportion"
)
}
folds <-
folds %>%
mutate(var = map(recipes, get_var_explained),
var = unname(var))
#> Error in `mutate()`:
#> βΉ In argument: `var = map(recipes, get_var_explained)`.
#> Caused by error in `map()`:
#> βΉ In index: 1.
#> Caused by error in `mutate()`:
#> βΉ In argument: `predictors = cumsum(xve) %>% as.vector()`.
#> Caused by error:
#> ! `predictors` must be size 1, not 102.
#> Backtrace:
#> β
#> 1. ββfolds %>% ...
#> 2. ββdplyr::mutate(., var = map(recipes, get_var_explained), var = unname(var))
#> 3. ββdplyr:::mutate.data.frame(...)
#> 4. β ββdplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#> 5. β ββbase::withCallingHandlers(...)
#> 6. β ββdplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#> 7. β ββmask$eval_all_mutate(quo)
#> 8. β ββdplyr (local) eval()
#> 9. ββpurrr::map(recipes, get_var_explained)
#> 10. β ββpurrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 11. β ββpurrr:::with_indexed_errors(...)
#> 12. β β ββbase::withCallingHandlers(...)
#> 13. β ββpurrr:::call_with_cleanup(...)
#> 14. β ββglobal .f(.x[[i]], ...)
#> 15. β ββ... %>% ...
#> 16. ββtidyr::pivot_longer(...)
#> 17. ββdplyr::mutate(., predictors = cumsum(xve) %>% as.vector(), components = seq_along(xve))
#> 18. ββdplyr:::mutate.data.frame(., predictors = cumsum(xve) %>% as.vector(), components = seq_along(xve))
#> 19. β ββdplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#> 20. β ββbase::withCallingHandlers(...)
#> 21. β ββdplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#> 22. β ββmask$eval_all_mutate(quo)
#> 23. β ββdplyr (local) eval()
#> 24. ββdplyr:::dplyr_internal_error(...)
#> 25. β ββrlang::abort(class = c(class, "dplyr:::internal_error"), dplyr_error_data = data)
#> 26. β ββrlang:::signal_abort(cnd, .file)
#> 27. β ββbase::signalCondition(cnd)
#> 28. ββdplyr (local) `<fn>`(`<dpl:::__>`)
#> 29. ββrlang::abort(message, class = error_class, parent = parent, call = error_call)
Created on 2023-02-16 with reprex v2.0.2
I figured out the issue. The changes to purrr didn't break the tutorial outright, but it did break when the number of outcomes was reduced to 1 instead of 3 (as in the document), then drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) would produce a numeric vector instead of a numeric matrix because the dimensions were [1, 1, 102].
This PR fixes this edge-case by using abind::adrop()
instead of drop()
https://github.com/tidymodels/tidymodels.org/pull/277.
I will thus close this issue.
This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex https://reprex.tidyverse.org) and link to this issue.
Tips for a helpful bug report:
The reprex is the code that can be found on this webpage: https://www.tidymodels.org/learn/models/pls/
The problem
I'm having trouble with running the demo code on that page. It looks like the purr::map function is erroring out with the message:
Reproducible example
Use: the latest version of purr Follow the instructions here: https://www.tidymodels.org/learn/models/pls/
Also this error super confusing... Why would an environment like a ggplot or prep script be a list()? Why would we assume that its not actually 16 long. Is there a fundamental assumption in purrr that lists cannot be nested?
If both the first index and second index of any structure (including reciepes) in purr are equal then what is the point of evening using a maps?
Isn't the point of nested structures in tidymodels, so you can apply tidy principles to models and not just datasets?