tidymodels / recipes

Pipeable steps for feature engineering and data preprocessing to prepare for modeling
https://recipes.tidymodels.org
Other
574 stars 113 forks source link

tidyr() does not work on step_zv() #410

Closed jaredlander closed 4 years ago

jaredlander commented 5 years ago

From @topepo's AML workshop, when calling tidy() on the result of step_zv() you get the following error.

Error in `$<-.data.frame`(`*tmp*`, "id", value = "zv_glgrl") : 
  replacement has 1 row, data has 0

tidy() works on other steps.

Code to reproduce below:

library(AmesHousing)
library(tidymodels)

ames <- make_ames() %>% 
    select(-matches('Qu'))

set.seed(4595)
data_split <- initial_split(data=ames, prop=0.75, strata='Sale_Price')
data_split

ames_train <- training(data_split)
ames_test <- testing(data_split)

mod_rec_zv <- recipe(
    Sale_Price ~ Longitude + Latitude + Neighborhood, 
    data = ames_train
) %>%
    step_log(Sale_Price, base = 10) %>%
    # Lump factor levels that occur in 
    # <= 5% of data as "other"
    # step_other(Neighborhood, threshold = 0.05) %>%
    # Create dummy variables for _any_ factor variables
    step_dummy(all_nominal()) %>% 
    step_zv(everything()) %>% 
    prep()

tidy(mod_rec_zv, number=3)
hlynurhallgrims commented 5 years ago

Quick note: I think this might have to do with the fact that step_zv didn't remove any predictors. The tidy method worked for those of us who used step_nzv, where there were some predictors removed.

jaredlander commented 5 years ago

That's a solid point. It should give a message saying nothing was removed rather than giving an error.

topepo commented 4 years ago

I had the error during the workshop, but something else seems to have fixed it. Can either of you reproduce it and do a reprex::reprex(si = TRUE)?

library(AmesHousing)
library(tidymodels)
#> Registered S3 method overwritten by 'xts':
#>   method     from
#>   as.zoo.xts zoo
#> ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────── tidymodels 0.0.3 ──
#> ✓ broom     0.5.2            ✓ purrr     0.3.3       
#> ✓ dials     0.0.4            ✓ recipes   0.1.7       
#> ✓ dplyr     0.8.3            ✓ rsample   0.0.5       
#> ✓ ggplot2   3.2.1.9000       ✓ tibble    2.99.99.9010
#> ✓ infer     0.5.1            ✓ yardstick 0.0.4       
#> ✓ parsnip   0.0.4.9000
#> ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard()    masks scales::discard()
#> x dplyr::filter()     masks stats::filter()
#> x dplyr::lag()        masks stats::lag()
#> x ggplot2::margin()   masks dials::margin()
#> x recipes::step()     masks stats::step()
#> x recipes::yj_trans() masks scales::yj_trans()

ames <- make_ames() %>% 
  dplyr::select(-matches('Qu'))

set.seed(4595)
data_split <- initial_split(data=ames, prop=0.75, strata='Sale_Price')
data_split
#> <2199/731/2930>

ames_train <- training(data_split)
ames_test <- testing(data_split)

mod_rec_zv <- recipe(
  Sale_Price ~ Longitude + Latitude + Neighborhood, 
  data = ames_train
) %>%
  step_log(Sale_Price, base = 10) %>%
  step_dummy(all_nominal()) %>% 
  step_zv(everything()) %>% 
  prep()

tidy(mod_rec_zv, number=3)
#> # A tibble: 0 x 2
#> # … with 2 variables: terms <chr>, id <chr>

Created on 2019-12-14 by the reprex package (v0.3.0)

Session info ``` r devtools::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 3.6.1 (2019-07-05) #> os macOS Mojave 10.14.6 #> system x86_64, darwin15.6.0 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz America/New_York #> date 2019-12-14 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib source #> AmesHousing * 0.0.3 2017-12-17 [1] CRAN (R 3.6.0) #> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0) #> backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.0) #> base64enc 0.1-3 2015-07-28 [1] CRAN (R 3.6.0) #> bayesplot 1.7.0 2019-05-23 [1] CRAN (R 3.6.0) #> boot 1.3-22 2019-04-02 [1] CRAN (R 3.6.1) #> broom * 0.5.2 2019-04-07 [1] CRAN (R 3.6.0) #> callr 3.3.2 2019-09-22 [1] CRAN (R 3.6.0) #> class 7.3-15 2019-01-01 [1] CRAN (R 3.6.1) #> cli 2.0.0.9000 2019-12-14 [1] Github (r-lib/cli@ac2c18b) #> codetools 0.2-16 2018-12-24 [1] CRAN (R 3.6.1) #> colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0) #> colourpicker 1.0 2017-09-27 [1] CRAN (R 3.6.0) #> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0) #> crosstalk 1.0.0 2016-12-21 [1] CRAN (R 3.6.0) #> desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.0) #> devtools 2.2.1.9000 2019-10-31 [1] Github (r-lib/devtools@1da672a) #> dials * 0.0.4 2019-12-02 [1] CRAN (R 3.6.1) #> DiceDesign 1.8-1 2019-07-31 [1] CRAN (R 3.6.0) #> digest 0.6.23 2019-11-23 [1] CRAN (R 3.6.1) #> dplyr * 0.8.3 2019-07-04 [1] CRAN (R 3.6.0) #> DT 0.9 2019-09-17 [1] CRAN (R 3.6.0) #> dygraphs 1.1.1.6 2018-07-11 [1] CRAN (R 3.6.0) #> ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0) #> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0) #> fansi 0.4.0 2018-10-05 [1] CRAN (R 3.6.0) #> fastmap 1.0.1 2019-10-08 [1] CRAN (R 3.6.0) #> fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.0) #> furrr 0.1.0 2018-05-16 [1] CRAN (R 3.6.0) #> future 1.15.1 2019-11-25 [1] CRAN (R 3.6.0) #> generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.0) #> ggplot2 * 3.2.1.9000 2019-12-06 [1] local #> ggridges 0.5.1 2018-09-27 [1] CRAN (R 3.6.0) #> globals 0.12.5 2019-12-07 [1] CRAN (R 3.6.0) #> glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.0) #> gower 0.2.1 2019-05-14 [1] CRAN (R 3.6.0) #> gridExtra 2.3 2017-09-09 [1] CRAN (R 3.6.0) #> gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0) #> gtools 3.8.1 2018-06-26 [1] CRAN (R 3.6.0) #> highr 0.8 2019-03-20 [1] CRAN (R 3.6.0) #> htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0) #> htmlwidgets 1.5.1 2019-10-08 [1] CRAN (R 3.6.0) #> httpuv 1.5.2 2019-09-11 [1] CRAN (R 3.6.0) #> igraph 1.2.4.1 2019-04-22 [1] CRAN (R 3.6.0) #> infer * 0.5.1 2019-11-19 [1] CRAN (R 3.6.0) #> inline 0.3.15 2018-05-18 [1] CRAN (R 3.6.0) #> ipred 0.9-9 2019-04-28 [1] CRAN (R 3.6.0) #> janeaustenr 0.1.5 2017-06-10 [1] CRAN (R 3.6.0) #> knitr 1.26 2019-11-12 [1] CRAN (R 3.6.0) #> later 1.0.0 2019-10-04 [1] CRAN (R 3.6.1) #> lattice 0.20-38 2018-11-04 [1] CRAN (R 3.6.1) #> lava 1.6.6 2019-08-01 [1] CRAN (R 3.6.0) #> lifecycle 0.1.0 2019-08-01 [1] CRAN (R 3.6.0) #> listenv 0.8.0 2019-12-05 [1] CRAN (R 3.6.1) #> lme4 1.1-21 2019-03-05 [1] CRAN (R 3.6.0) #> loo 2.1.0 2019-03-13 [1] CRAN (R 3.6.0) #> lubridate 1.7.4 2018-04-11 [1] CRAN (R 3.6.0) #> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0) #> markdown 1.1 2019-08-07 [1] CRAN (R 3.6.0) #> MASS 7.3-51.4 2019-03-31 [1] CRAN (R 3.6.1) #> Matrix 1.2-17 2019-03-22 [1] CRAN (R 3.6.1) #> matrixStats 0.55.0 2019-09-07 [1] CRAN (R 3.6.0) #> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.0) #> mime 0.7 2019-06-11 [1] CRAN (R 3.6.0) #> miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 3.6.0) #> minqa 1.2.4 2014-10-09 [1] CRAN (R 3.6.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0) #> nlme 3.1-140 2019-05-12 [1] CRAN (R 3.6.1) #> nloptr 1.2.1 2018-10-03 [1] CRAN (R 3.6.0) #> nnet 7.3-12 2016-02-02 [1] CRAN (R 3.6.1) #> parsnip * 0.0.4.9000 2019-12-14 [1] local #> pillar 1.4.2.9001 2019-11-24 [1] Github (r-lib/pillar@82370d7) #> pkgbuild 1.0.6 2019-10-09 [1] CRAN (R 3.6.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0) #> pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.0) #> plyr 1.8.5 2019-12-10 [1] CRAN (R 3.6.0) #> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.6.0) #> pROC 1.15.3 2019-07-21 [1] CRAN (R 3.6.0) #> processx 3.4.1 2019-07-18 [1] CRAN (R 3.6.0) #> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 3.6.0) #> promises 1.1.0 2019-10-04 [1] CRAN (R 3.6.0) #> ps 1.3.0 2018-12-21 [1] CRAN (R 3.6.0) #> purrr * 0.3.3 2019-10-18 [1] CRAN (R 3.6.0) #> R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0) #> Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.0) #> recipes * 0.1.7 2019-09-15 [1] CRAN (R 3.6.0) #> remotes 2.1.0 2019-06-24 [1] CRAN (R 3.6.0) #> reshape2 1.4.3 2017-12-11 [1] CRAN (R 3.6.0) #> rlang 0.4.2.9000 2019-12-14 [1] Github (r-lib/rlang@ec7c1ed) #> rmarkdown 1.18 2019-11-27 [1] CRAN (R 3.6.0) #> rpart 4.1-15 2019-04-12 [1] CRAN (R 3.6.1) #> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0) #> rsample * 0.0.5 2019-07-12 [1] CRAN (R 3.6.0) #> rsconnect 0.8.15 2019-07-22 [1] CRAN (R 3.6.0) #> rstan 2.19.2 2019-07-09 [1] CRAN (R 3.6.0) #> rstanarm 2.19.2 2019-10-03 [1] CRAN (R 3.6.1) #> rstantools 2.0.0 2019-09-15 [1] CRAN (R 3.6.0) #> rstudioapi 0.10 2019-03-19 [1] CRAN (R 3.6.0) #> scales * 1.1.0 2019-11-18 [1] CRAN (R 3.6.0) #> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0) #> shiny 1.4.0 2019-10-10 [1] CRAN (R 3.6.0) #> shinyjs 1.0 2018-01-08 [1] CRAN (R 3.6.0) #> shinystan 2.5.0 2018-05-01 [1] CRAN (R 3.6.0) #> shinythemes 1.1.2 2018-11-06 [1] CRAN (R 3.6.0) #> SnowballC 0.6.0 2019-01-15 [1] CRAN (R 3.6.0) #> StanHeaders 2.19.0 2019-09-07 [1] CRAN (R 3.6.0) #> stringi 1.4.3 2019-03-12 [1] CRAN (R 3.6.0) #> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0) #> survival 2.40-1 2016-10-30 [1] CRAN (R 3.6.1) #> testthat 2.2.1 2019-07-25 [1] CRAN (R 3.6.0) #> threejs 0.3.1 2017-08-13 [1] CRAN (R 3.6.0) #> tibble * 2.99.99.9010 2019-12-06 [1] Github (tidyverse/tibble@f4365f7) #> tidymodels * 0.0.3 2019-10-04 [1] CRAN (R 3.6.0) #> tidyposterior 0.0.2 2018-11-15 [1] CRAN (R 3.6.0) #> tidypredict 0.4.3 2019-09-03 [1] CRAN (R 3.6.0) #> tidyr * 1.0.0 2019-09-11 [1] CRAN (R 3.6.0) #> tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.6.0) #> tidytext 0.2.2 2019-07-29 [1] CRAN (R 3.6.0) #> timeDate 3043.102 2018-02-21 [1] CRAN (R 3.6.0) #> tokenizers 0.2.1 2018-03-29 [1] CRAN (R 3.6.0) #> usethis 1.5.1.9000 2019-12-06 [1] Github (r-lib/usethis@c7314cf) #> utf8 1.1.4 2018-05-24 [1] CRAN (R 3.6.0) #> vctrs 0.2.0.9007 2019-12-14 [1] Github (r-lib/vctrs@7228f79) #> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0) #> xfun 0.11 2019-11-12 [1] CRAN (R 3.6.0) #> xtable 1.8-4 2019-04-21 [1] CRAN (R 3.6.0) #> xts 0.11-2 2018-11-05 [1] CRAN (R 3.6.0) #> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0) #> yardstick * 0.0.4 2019-08-26 [1] CRAN (R 3.6.0) #> zoo 1.8-6 2019-05-28 [1] CRAN (R 3.6.0) #> #> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library ```
hlynurhallgrims commented 4 years ago

Yes, I ran it and still got an error.

library(AmesHousing)
library(tidymodels)
#> -- Attaching packages --------------------------------------------------------------------------------------------------- tidymodels 0.0.3 --
#> v broom     0.5.2          v purrr     0.3.3     
#> v dials     0.0.3.9002     v recipes   0.1.7.9002
#> v dplyr     0.8.3          v rsample   0.0.5     
#> v ggplot2   3.2.1          v tibble    2.1.3     
#> v infer     0.5.0          v yardstick 0.0.4     
#> v parsnip   0.0.4.9000
#> -- Conflicts ------------------------------------------------------------------------------------------------------ tidymodels_conflicts() --
#> x purrr::discard()  masks scales::discard()
#> x dplyr::filter()   masks stats::filter()
#> x dplyr::lag()      masks stats::lag()
#> x ggplot2::margin() masks dials::margin()
#> x dials::offset()   masks stats::offset()
#> x recipes::step()   masks stats::step()

ames <- make_ames() %>% 
  dplyr::select(-matches('Qu'))

set.seed(4595)
data_split <- initial_split(data=ames, prop=0.75, strata='Sale_Price')
data_split
#> <2199/731/2930>

ames_train <- training(data_split)
ames_test <- testing(data_split)

mod_rec_zv <- recipe(
  Sale_Price ~ Longitude + Latitude + Neighborhood, 
  data = ames_train
) %>%
  step_log(Sale_Price, base = 10) %>%
  step_dummy(all_nominal()) %>% 
  step_zv(everything()) %>% 
  prep()

tidy(mod_rec_zv, number=3)
#> Error in `$<-.data.frame`(`*tmp*`, "id", value = "zv_Q8mKv"): replacement has 1 row, data has 0

Created on 2019-12-16 by the reprex package (v0.3.0)

Session info ``` r devtools::session_info() #> - Session info ---------------------------------------------------------- #> setting value #> version R version 3.5.3 (2019-03-11) #> os Windows 10 x64 #> system x86_64, mingw32 #> ui RTerm #> language (EN) #> collate English_United Kingdom.1252 #> ctype English_United Kingdom.1252 #> tz Africa/Casablanca #> date 2019-12-16 #> #> - Packages -------------------------------------------------------------- #> package * version date lib #> AmesHousing * 0.0.3 2017-12-17 [1] #> assertthat 0.2.1 2019-03-21 [1] #> backports 1.1.5 2019-10-02 [1] #> base64enc 0.1-3 2015-07-28 [1] #> bayesplot 1.6.0 2018-08-02 [1] #> broom * 0.5.2 2019-04-07 [1] #> callr 3.3.2 2019-09-22 [1] #> class 7.3-15 2019-01-01 [2] #> cli 2.0.0.9000 2019-12-13 [1] #> codetools 0.2-16 2018-12-24 [2] #> colorspace 1.4-1 2019-03-18 [1] #> colourpicker 1.0 2017-09-27 [1] #> crayon 1.3.4 2017-09-16 [1] #> crosstalk 1.0.0 2016-12-21 [1] #> desc 1.2.0 2018-05-01 [1] #> devtools 2.2.1 2019-09-24 [1] #> dials * 0.0.3.9002 2019-11-16 [1] #> DiceDesign 1.8-1 2019-07-31 [1] #> digest 0.6.20 2019-07-04 [1] #> dplyr * 0.8.3 2019-07-04 [1] #> DT 0.4 2018-01-30 [1] #> dygraphs 1.1.1.6 2018-07-11 [1] #> ellipsis 0.3.0 2019-09-20 [1] #> evaluate 0.13 2019-02-12 [1] #> fansi 0.4.0 2018-10-05 [1] #> fs 1.3.1 2019-05-06 [1] #> furrr 0.1.0 2018-05-16 [1] #> future 1.12.0 2019-03-08 [1] #> generics 0.0.2 2018-11-29 [1] #> ggplot2 * 3.2.1 2019-08-10 [1] #> ggridges 0.5.0 2018-04-05 [1] #> globals 0.12.4 2018-10-11 [1] #> glue 1.3.1 2019-03-12 [1] #> gower 0.2.0 2019-03-07 [1] #> gridExtra 2.3 2017-09-09 [1] #> gtable 0.3.0 2019-03-25 [1] #> gtools 3.8.1 2018-06-26 [1] #> highr 0.8 2019-03-20 [1] #> htmltools 0.3.6 2017-04-28 [1] #> htmlwidgets 1.3 2018-09-30 [1] #> httpuv 1.5.1 2019-04-05 [1] #> igraph 1.2.4 2019-02-13 [1] #> infer * 0.5.0 2019-09-27 [1] #> inline 0.3.15 2018-05-18 [1] #> ipred 0.9-8 2018-11-05 [1] #> janeaustenr 0.1.5 2017-06-10 [1] #> knitr 1.23 2019-05-18 [1] #> later 0.8.0 2019-02-11 [1] #> lattice 0.20-38 2018-11-04 [2] #> lava 1.6.5 2019-02-12 [1] #> lazyeval 0.2.2 2019-03-15 [1] #> lifecycle 0.1.0 2019-08-01 [1] #> listenv 0.7.0 2018-01-21 [1] #> lme4 1.1-17 2018-04-03 [1] #> loo 2.0.0 2018-04-11 [1] #> lubridate 1.7.4 2018-04-11 [1] #> magrittr 1.5 2014-11-22 [1] #> markdown 0.9 2018-12-07 [1] #> MASS 7.3-51.1 2018-11-01 [2] #> Matrix 1.2-15 2018-11-01 [2] #> matrixStats 0.54.0 2018-07-23 [1] #> memoise 1.1.0 2017-04-21 [1] #> mime 0.6 2018-10-05 [1] #> miniUI 0.1.1.1 2018-05-18 [1] #> minqa 1.2.4 2014-10-09 [1] #> munsell 0.5.0 2018-06-12 [1] #> nlme 3.1-137 2018-04-07 [2] #> nloptr 1.0.4 2017-08-22 [1] #> nnet 7.3-12 2016-02-02 [2] #> parsnip * 0.0.4.9000 2019-11-16 [1] #> pillar 1.4.2 2019-06-29 [1] #> pkgbuild 1.0.3 2019-03-20 [1] #> pkgconfig 2.0.2 2018-08-16 [1] #> pkgload 1.0.2 2018-10-29 [1] #> plyr 1.8.4 2016-06-08 [1] #> prettyunits 1.0.2 2015-07-13 [1] #> pROC 1.15.3 2019-07-21 [1] #> processx 3.4.1 2019-07-18 [1] #> prodlim 2018.04.18 2018-04-18 [1] #> promises 1.0.1 2018-04-13 [1] #> ps 1.3.0 2018-12-21 [1] #> purrr * 0.3.3 2019-10-18 [1] #> R6 2.4.0 2019-02-14 [1] #> Rcpp 1.0.1 2019-03-17 [1] #> recipes * 0.1.7.9002 2019-12-16 [1] #> remotes 2.1.0 2019-06-24 [1] #> reshape2 1.4.3 2017-12-11 [1] #> rlang 0.4.1 2019-10-24 [1] #> rmarkdown 1.10 2018-06-11 [1] #> rpart 4.1-15 2019-04-12 [1] #> rprojroot 1.3-2 2018-01-03 [1] #> rsample * 0.0.5 2019-07-12 [1] #> rsconnect 0.8.15 2019-07-22 [1] #> rstan 2.18.2 2018-11-07 [1] #> rstanarm 2.17.4 2018-04-13 [1] #> rstantools 1.5.0 2018-04-17 [1] #> rstudioapi 0.10 2019-03-19 [1] #> scales * 1.0.0 2018-08-09 [1] #> sessioninfo 1.1.1 2018-11-05 [1] #> shiny 1.3.1 2019-04-12 [1] #> shinyjs 1.0 2018-01-08 [1] #> shinystan 2.5.0 2018-05-01 [1] #> shinythemes 1.1.1 2016-10-12 [1] #> SnowballC 0.6.0 2019-01-15 [1] #> StanHeaders 2.18.1 2019-01-28 [1] #> stringi 1.4.3 2019-03-12 [1] #> stringr 1.4.0 2019-02-10 [1] #> survival 2.43-3 2018-11-26 [2] #> testthat 2.3.0 2019-11-05 [1] #> threejs 0.3.1 2017-08-13 [1] #> tibble * 2.1.3 2019-06-06 [1] #> tidymodels * 0.0.3 2019-10-04 [1] #> tidyposterior 0.0.2 2018-11-15 [1] #> tidypredict 0.4.3 2019-09-03 [1] #> tidyr * 1.0.0 2019-09-11 [1] #> tidyselect 0.2.5 2018-10-11 [1] #> tidytext 0.2.2 2019-07-29 [1] #> timeDate 3043.102 2018-02-21 [1] #> tokenizers 0.2.1 2018-03-29 [1] #> usethis 1.5.0 2019-04-07 [1] #> vctrs 0.2.0 2019-07-05 [1] #> withr 2.1.2 2018-03-15 [1] #> xfun 0.6 2019-04-02 [1] #> xtable 1.8-3 2018-08-29 [1] #> xts 0.11-0 2018-07-16 [1] #> yaml 2.2.0 2018-07-25 [1] #> yardstick * 0.0.4 2019-08-26 [1] #> zeallot 0.1.0 2018-01-28 [1] #> zoo 1.8-3 2018-07-16 [1] #> source #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> Github (r-lib/cli@ac2c18b) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> Github (tidymodels/dials@fcf4cb5) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.2) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> Github (tidymodels/parsnip@4230ef8) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.0) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> Github (tidymodels/recipes@29b3b2d) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.2) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.2) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> #> [1] C:/Users/r09hlha/Documents/R/win-library/3.5 #> [2] C:/Program Files/Microsoft/R Open/R-3.5.3/library ```
topepo commented 4 years ago

Can you run devtools::install_dev("tibble") and try again?

hlynurhallgrims commented 4 years ago

Yes! That does the trick. vctrs and cli also updated during the tibble installation, if that matters.

library(AmesHousing)
library(tidymodels)
#> -- Attaching packages --------------------------------------------------------------------------------------------------- tidymodels 0.0.3 --
#> v broom     0.5.2            v purrr     0.3.3       
#> v dials     0.0.3.9002       v recipes   0.1.7.9002  
#> v dplyr     0.8.3            v rsample   0.0.5       
#> v ggplot2   3.2.1            v tibble    2.99.99.9010
#> v infer     0.5.0            v yardstick 0.0.4       
#> v parsnip   0.0.4.9000
#> -- Conflicts ------------------------------------------------------------------------------------------------------ tidymodels_conflicts() --
#> x purrr::discard()  masks scales::discard()
#> x dplyr::filter()   masks stats::filter()
#> x dplyr::lag()      masks stats::lag()
#> x ggplot2::margin() masks dials::margin()
#> x dials::offset()   masks stats::offset()
#> x recipes::step()   masks stats::step()

ames <- make_ames() %>% 
  dplyr::select(-matches('Qu'))

set.seed(4595)
data_split <- initial_split(data=ames, prop=0.75, strata='Sale_Price')
data_split
#> <2199/731/2930>

ames_train <- training(data_split)
ames_test <- testing(data_split)

mod_rec_zv <- recipe(
  Sale_Price ~ Longitude + Latitude + Neighborhood, 
  data = ames_train
) %>%
  step_log(Sale_Price, base = 10) %>%
  step_dummy(all_nominal()) %>% 
  step_zv(everything()) %>% 
  prep()

tidy(mod_rec_zv, number=3)
#> # A tibble: 0 x 2
#> # ... with 2 variables: terms <chr>, id <chr>

Created on 2019-12-17 by the reprex package (v0.3.0)

Session info ``` r devtools::session_info() #> - Session info ---------------------------------------------------------- #> setting value #> version R version 3.5.3 (2019-03-11) #> os Windows 10 x64 #> system x86_64, mingw32 #> ui RTerm #> language (EN) #> collate English_United Kingdom.1252 #> ctype English_United Kingdom.1252 #> tz Africa/Casablanca #> date 2019-12-17 #> #> - Packages -------------------------------------------------------------- #> package * version date lib #> AmesHousing * 0.0.3 2017-12-17 [1] #> assertthat 0.2.1 2019-03-21 [1] #> backports 1.1.5 2019-10-02 [1] #> base64enc 0.1-3 2015-07-28 [1] #> bayesplot 1.6.0 2018-08-02 [1] #> broom * 0.5.2 2019-04-07 [1] #> callr 3.3.2 2019-09-22 [1] #> class 7.3-15 2019-01-01 [2] #> cli 2.0.0.9000 2019-12-17 [1] #> codetools 0.2-16 2018-12-24 [2] #> colorspace 1.4-1 2019-03-18 [1] #> colourpicker 1.0 2017-09-27 [1] #> crayon 1.3.4 2017-09-16 [1] #> crosstalk 1.0.0 2016-12-21 [1] #> desc 1.2.0 2018-05-01 [1] #> devtools 2.2.1 2019-09-24 [1] #> dials * 0.0.3.9002 2019-11-16 [1] #> DiceDesign 1.8-1 2019-07-31 [1] #> digest 0.6.20 2019-07-04 [1] #> dplyr * 0.8.3 2019-07-04 [1] #> DT 0.4 2018-01-30 [1] #> dygraphs 1.1.1.6 2018-07-11 [1] #> ellipsis 0.3.0 2019-09-20 [1] #> evaluate 0.13 2019-02-12 [1] #> fansi 0.4.0 2018-10-05 [1] #> fs 1.3.1 2019-05-06 [1] #> furrr 0.1.0 2018-05-16 [1] #> future 1.12.0 2019-03-08 [1] #> generics 0.0.2 2018-11-29 [1] #> ggplot2 * 3.2.1 2019-08-10 [1] #> ggridges 0.5.0 2018-04-05 [1] #> globals 0.12.4 2018-10-11 [1] #> glue 1.3.1 2019-03-12 [1] #> gower 0.2.0 2019-03-07 [1] #> gridExtra 2.3 2017-09-09 [1] #> gtable 0.3.0 2019-03-25 [1] #> gtools 3.8.1 2018-06-26 [1] #> highr 0.8 2019-03-20 [1] #> htmltools 0.3.6 2017-04-28 [1] #> htmlwidgets 1.3 2018-09-30 [1] #> httpuv 1.5.1 2019-04-05 [1] #> igraph 1.2.4 2019-02-13 [1] #> infer * 0.5.0 2019-09-27 [1] #> inline 0.3.15 2018-05-18 [1] #> ipred 0.9-8 2018-11-05 [1] #> janeaustenr 0.1.5 2017-06-10 [1] #> knitr 1.23 2019-05-18 [1] #> later 0.8.0 2019-02-11 [1] #> lattice 0.20-38 2018-11-04 [2] #> lava 1.6.5 2019-02-12 [1] #> lazyeval 0.2.2 2019-03-15 [1] #> lifecycle 0.1.0 2019-08-01 [1] #> listenv 0.7.0 2018-01-21 [1] #> lme4 1.1-17 2018-04-03 [1] #> loo 2.0.0 2018-04-11 [1] #> lubridate 1.7.4 2018-04-11 [1] #> magrittr 1.5 2014-11-22 [1] #> markdown 0.9 2018-12-07 [1] #> MASS 7.3-51.1 2018-11-01 [2] #> Matrix 1.2-15 2018-11-01 [2] #> matrixStats 0.54.0 2018-07-23 [1] #> memoise 1.1.0 2017-04-21 [1] #> mime 0.6 2018-10-05 [1] #> miniUI 0.1.1.1 2018-05-18 [1] #> minqa 1.2.4 2014-10-09 [1] #> munsell 0.5.0 2018-06-12 [1] #> nlme 3.1-137 2018-04-07 [2] #> nloptr 1.0.4 2017-08-22 [1] #> nnet 7.3-12 2016-02-02 [2] #> parsnip * 0.0.4.9000 2019-11-16 [1] #> pillar 1.4.2.9001 2019-12-16 [1] #> pkgbuild 1.0.3 2019-03-20 [1] #> pkgconfig 2.0.2 2018-08-16 [1] #> pkgload 1.0.2 2018-10-29 [1] #> plyr 1.8.4 2016-06-08 [1] #> prettyunits 1.0.2 2015-07-13 [1] #> pROC 1.15.3 2019-07-21 [1] #> processx 3.4.1 2019-07-18 [1] #> prodlim 2018.04.18 2018-04-18 [1] #> promises 1.0.1 2018-04-13 [1] #> ps 1.3.0 2018-12-21 [1] #> purrr * 0.3.3 2019-10-18 [1] #> R6 2.4.0 2019-02-14 [1] #> Rcpp 1.0.1 2019-03-17 [1] #> recipes * 0.1.7.9002 2019-12-16 [1] #> remotes 2.1.0 2019-06-24 [1] #> reshape2 1.4.3 2017-12-11 [1] #> rlang 0.4.2 2019-11-23 [1] #> rmarkdown 1.10 2018-06-11 [1] #> rpart 4.1-15 2019-04-12 [1] #> rprojroot 1.3-2 2018-01-03 [1] #> rsample * 0.0.5 2019-07-12 [1] #> rsconnect 0.8.15 2019-07-22 [1] #> rstan 2.18.2 2018-11-07 [1] #> rstanarm 2.17.4 2018-04-13 [1] #> rstantools 1.5.0 2018-04-17 [1] #> rstudioapi 0.10 2019-03-19 [1] #> scales * 1.0.0 2018-08-09 [1] #> sessioninfo 1.1.1 2018-11-05 [1] #> shiny 1.3.1 2019-04-12 [1] #> shinyjs 1.0 2018-01-08 [1] #> shinystan 2.5.0 2018-05-01 [1] #> shinythemes 1.1.1 2016-10-12 [1] #> SnowballC 0.6.0 2019-01-15 [1] #> StanHeaders 2.18.1 2019-01-28 [1] #> stringi 1.4.3 2019-03-12 [1] #> stringr 1.4.0 2019-02-10 [1] #> survival 2.43-3 2018-11-26 [2] #> testthat 2.3.0 2019-11-05 [1] #> threejs 0.3.1 2017-08-13 [1] #> tibble * 2.99.99.9010 2019-12-17 [1] #> tidymodels * 0.0.3 2019-10-04 [1] #> tidyposterior 0.0.2 2018-11-15 [1] #> tidypredict 0.4.3 2019-09-03 [1] #> tidyr * 1.0.0 2019-09-11 [1] #> tidyselect 0.2.5 2018-10-11 [1] #> tidytext 0.2.2 2019-07-29 [1] #> timeDate 3043.102 2018-02-21 [1] #> tokenizers 0.2.1 2018-03-29 [1] #> usethis 1.5.0 2019-04-07 [1] #> utf8 1.1.4 2018-05-24 [1] #> vctrs 0.2.0.9007 2019-12-17 [1] #> withr 2.1.2 2018-03-15 [1] #> xfun 0.6 2019-04-02 [1] #> xtable 1.8-3 2018-08-29 [1] #> xts 0.11-0 2018-07-16 [1] #> yaml 2.2.0 2018-07-25 [1] #> yardstick * 0.0.4 2019-08-26 [1] #> zoo 1.8-3 2018-07-16 [1] #> source #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> Github (r-lib/cli@c1786b5) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> Github (tidymodels/dials@fcf4cb5) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.2) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> Github (tidymodels/parsnip@4230ef8) #> Github (r-lib/pillar@82370d7) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.0) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> Github (tidymodels/recipes@29b3b2d) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.1) #> CRAN (R 3.5.2) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> Github (tidyverse/tibble@f4365f7) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.0) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> Github (r-lib/vctrs@7228f79) #> CRAN (R 3.5.1) #> CRAN (R 3.5.3) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> CRAN (R 3.5.2) #> CRAN (R 3.5.3) #> CRAN (R 3.5.1) #> #> [1] C:/Users/r09hlha/Documents/R/win-library/3.5 #> [2] C:/Program Files/Microsoft/R Open/R-3.5.3/library ```
topepo commented 4 years ago

I installed the CRAN versions of tibble, vctrs, and rlang and the error still occurs with the devel version of recipes.

I'm going to leave recipes as-is for the time being and will verify that the new release of these (probably tibble) solves the issue. We need to submit recipes this week by CRAN mandate.

topepo commented 4 years ago

It looks like tibble solved it:

library(AmesHousing)
library(tidymodels)
#> ── Attaching packages ───────────────────────────────────────────────────────────── tidymodels 0.1.0 ──
#> ✓ broom     0.5.4          ✓ recipes   0.1.12    
#> ✓ dials     0.0.6          ✓ rsample   0.0.6     
#> ✓ dplyr     0.8.5          ✓ tibble    3.0.1     
#> ✓ ggplot2   3.3.0          ✓ tune      0.1.0     
#> ✓ infer     0.5.1          ✓ workflows 0.1.0     
#> ✓ parsnip   0.1.0.9001     ✓ yardstick 0.0.5     
#> ✓ purrr     0.3.4
#> Warning: package 'rsample' was built under R version 3.6.2
#> Warning: package 'tibble' was built under R version 3.6.2
#> ── Conflicts ──────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard()  masks scales::discard()
#> x dplyr::filter()   masks stats::filter()
#> x dplyr::lag()      masks stats::lag()
#> x ggplot2::margin() masks dials::margin()
#> x recipes::step()   masks stats::step()

ames <- make_ames() %>% 
  select(-matches('Qu'))

set.seed(4595)
data_split <- initial_split(data=ames, prop=0.75, strata='Sale_Price')
data_split
#> <Training/Validation/Total>
#> <2199/731/2930>

ames_train <- training(data_split)
ames_test <- testing(data_split)

mod_rec_zv <- recipe(
  Sale_Price ~ Longitude + Latitude + Neighborhood, 
  data = ames_train
) %>%
  step_log(Sale_Price, base = 10) %>%
  # Lump factor levels that occur in 
  # <= 5% of data as "other"
  # step_other(Neighborhood, threshold = 0.05) %>%
  # Create dummy variables for _any_ factor variables
  step_dummy(all_nominal()) %>% 
  step_zv(everything()) %>% 
  prep()

tidy(mod_rec_zv, number=3)
#> # A tibble: 0 x 2
#> # … with 2 variables: terms <chr>, id <chr>

Created on 2020-05-01 by the reprex package (v0.3.0)

github-actions[bot] commented 3 years ago

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex https://reprex.tidyverse.org) and link to this issue.