tidymodels / tidyclust

A tidy unified interface to clustering models
https://tidyclust.tidymodels.org/
Other
108 stars 14 forks source link

Empty .notes with tune_cluster #74

Closed cgoo4 closed 2 years ago

cgoo4 commented 2 years ago

Really great to see clustering in tidymodels!

Tuning the following example, I'm not getting anything in .notes?

library(tidyverse)
library(tidymodels)
library(tidyclust)

data <- tribble(
  ~var1, ~var2,
  -10.7309753827869, 7.31314782728295,
  0.158313453988934, -1.61716134057181,
  2.56976828347436, -0.0304718731434479,
  2.52510547176991, 2.25837680812956,
  -0.545994998160519, -1.65459969854165,
  4.24718480285034, 2.63196351710467,
  -2.58277023154306, 0.071240121563463,
  1.76791313026014, -0.0820188617066647,
  0.700250002891148, -0.28659873913792,
  2.05609909308338, -0.944805582445498,
  0.341405837432006, -1.32977660565642,
  -2.50062436356793, -1.90326151166205,
  -1.0388232093446, -0.36389143468345,
  -1.00255850689283, -1.6047655508424,
  1.63335997713595, 1.3094039086307,
  2.8965669433778, 2.53970798590398,
  1.9755753941663, 1.05180385078969,
  0.949450225797159, 0.0715029785385012,
  -3.11293609366517, -1.20094733897835,
  0.433470080004349, 1.44883471157841,
  1.80155162083184, 1.74231103529767,
  -2.0330697607949, -2.238240115786,
  0.226699387711105, -1.56773486777401,
  0.705202066774586, 1.41309135840208,
  -2.0229531809923, -3.80825276177795,
  1.54212334904791, -0.33715090285275,
  2.98458548773627, 2.88958633008886,
  -0.551832844262862, -2.05139067509532,
  2.82225740379044, 2.21534428308637,
  -2.74329702499752, -3.54358226626749,
  -0.468043631321413, -1.56406035159058,
  0.466410395768423, -0.926812424657721,
  -3.46941317956229, 0.0992081867745776
)

kmeans_spec <- k_means(num_clusters = tune()) |> 
  set_engine("stats", algorithm = "Lloyd")

kmeans_rec <- recipe(~ var1 + var2, data = data)

kmeans_wflow <- workflow(kmeans_rec, kmeans_spec)

kmeans_cv <- vfold_cv(data, v = 5)

kmeans_grid <- grid_regular(num_clusters(), levels = 6)

kmeans_res <- tune_cluster(
  kmeans_wflow,
  resamples = kmeans_cv,
  grid = kmeans_grid,
  control = control_grid(save_pred = TRUE, extract = identity),
  metrics = cluster_metric_set(tot_wss, tot_sse, sse_ratio)
) 
#> Warning: All models failed. See the `.notes` column.

collect_notes(kmeans_res)
#> # A tibble: 0 × 2
#> # … with 2 variables: id <chr>, .notes <???>

Created on 2022-08-29 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.1 (2022-06-23) #> os macOS Big Sur ... 10.16 #> system x86_64, darwin17.0 #> ui X11 #> language (EN) #> collate en_GB.UTF-8 #> ctype en_GB.UTF-8 #> tz Europe/London #> date 2022-08-29 #> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0) #> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0) #> broom * 1.0.0 2022-07-01 [1] CRAN (R 4.2.0) #> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0) #> class 7.3-20 2022-01-16 [1] CRAN (R 4.2.1) #> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0) #> codetools 0.2-18 2020-11-04 [1] CRAN (R 4.2.1) #> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0) #> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0) #> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0) #> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.0) #> dials * 1.0.0 2022-06-14 [1] CRAN (R 4.2.0) #> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0) #> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0) #> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0) #> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0) #> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> flexclust 1.4-1 2022-04-08 [1] CRAN (R 4.2.0) #> forcats * 0.5.2 2022-08-19 [1] CRAN (R 4.2.1) #> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0) #> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) #> future 1.27.0 2022-07-22 [1] CRAN (R 4.2.1) #> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.2.0) #> gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1) #> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0) #> globals 0.16.0 2022-08-05 [1] CRAN (R 4.2.1) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.0) #> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.1) #> gower 1.0.0 2022-02-03 [1] CRAN (R 4.2.0) #> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0) #> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0) #> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0) #> haven 2.5.1 2022-08-22 [1] CRAN (R 4.2.1) #> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0) #> hms 1.1.2 2022-08-19 [1] CRAN (R 4.2.1) #> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.1) #> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.0) #> infer * 1.0.3 2022-08-22 [1] CRAN (R 4.2.1) #> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0) #> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0) #> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0) #> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.1) #> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.2.1) #> lava 1.6.10 2021-09-02 [1] CRAN (R 4.2.0) #> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.2.0) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0) #> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.2.0) #> lubridate 1.8.0.9000 2022-06-09 [1] Github (tidyverse/lubridate@0bb49b2) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> MASS 7.3-58.1 2022-08-03 [1] CRAN (R 4.2.0) #> Matrix 1.4-1 2022-03-23 [1] CRAN (R 4.2.1) #> modeldata * 1.0.0 2022-07-01 [1] CRAN (R 4.2.0) #> modelr 0.1.9 2022-08-19 [1] CRAN (R 4.2.1) #> modeltools 0.2-23 2020-03-05 [1] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> nnet 7.3-17 2022-01-16 [1] CRAN (R 4.2.1) #> parallelly 1.32.1 2022-07-21 [1] CRAN (R 4.2.1) #> parsnip * 1.0.1.9000 2022-08-28 [1] Github (tidymodels/parsnip@e1eb30a) #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0) #> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.1) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0) #> R.utils 2.12.0 2022-06-28 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.1) #> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0) #> readxl 1.4.1 2022-08-17 [1] CRAN (R 4.2.0) #> recipes * 1.0.1 2022-07-07 [1] CRAN (R 4.2.1) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0) #> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.0) #> rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.1) #> rpart 4.1.16 2022-01-24 [1] CRAN (R 4.2.1) #> rsample * 1.1.0 2022-08-08 [1] CRAN (R 4.2.1) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1) #> rvest 1.0.3 2022-08-19 [1] CRAN (R 4.2.1) #> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.1) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0) #> stringr * 1.4.1 2022-08-20 [1] CRAN (R 4.2.0) #> styler 1.7.0 2022-03-13 [1] CRAN (R 4.2.0) #> survival 3.4-0 2022-08-09 [1] CRAN (R 4.2.0) #> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.1) #> tidyclust * 0.0.0.9000 2022-08-28 [1] Github (EmilHvitfeldt/tidyclust@71a245a) #> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) #> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0) #> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0) #> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.1) #> timeDate 4021.104 2022-07-19 [1] CRAN (R 4.2.0) #> tune * 1.0.0 2022-07-07 [1] CRAN (R 4.2.1) #> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0) #> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> workflows * 1.0.0.9000 2022-08-28 [1] Github (tidymodels/workflows@099a735) #> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.0) #> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.1) #> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0) #> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0) #> yardstick * 1.0.0 2022-06-06 [1] CRAN (R 4.2.0) #> #> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
EmilHvitfeldt commented 2 years ago

Hello @cgoo4 👋

I'm not able to reproduce, using the most recent version of tidyclust. Can you update with remotes::install_github("emilhvitfeldt/tidyclust") and try again?

library(tidymodels)
library(tidyclust)
#> 
#> Attaching package: 'tidyclust'
#> The following object is masked from 'package:parsnip':
#> 
#>     prepare_data

data <- tribble(
  ~var1, ~var2,
  -10.7309753827869, 7.31314782728295,
  0.158313453988934, -1.61716134057181,
  2.56976828347436, -0.0304718731434479,
  2.52510547176991, 2.25837680812956,
  -0.545994998160519, -1.65459969854165,
  4.24718480285034, 2.63196351710467,
  -2.58277023154306, 0.071240121563463,
  1.76791313026014, -0.0820188617066647,
  0.700250002891148, -0.28659873913792,
  2.05609909308338, -0.944805582445498,
  0.341405837432006, -1.32977660565642,
  -2.50062436356793, -1.90326151166205,
  -1.0388232093446, -0.36389143468345,
  -1.00255850689283, -1.6047655508424,
  1.63335997713595, 1.3094039086307,
  2.8965669433778, 2.53970798590398,
  1.9755753941663, 1.05180385078969,
  0.949450225797159, 0.0715029785385012,
  -3.11293609366517, -1.20094733897835,
  0.433470080004349, 1.44883471157841,
  1.80155162083184, 1.74231103529767,
  -2.0330697607949, -2.238240115786,
  0.226699387711105, -1.56773486777401,
  0.705202066774586, 1.41309135840208,
  -2.0229531809923, -3.80825276177795,
  1.54212334904791, -0.33715090285275,
  2.98458548773627, 2.88958633008886,
  -0.551832844262862, -2.05139067509532,
  2.82225740379044, 2.21534428308637,
  -2.74329702499752, -3.54358226626749,
  -0.468043631321413, -1.56406035159058,
  0.466410395768423, -0.926812424657721,
  -3.46941317956229, 0.0992081867745776
)

kmeans_spec <- k_means(num_clusters = tune()) |> 
  set_engine("stats", algorithm = "Lloyd")

kmeans_rec <- recipe(~ var1 + var2, data = data)

kmeans_wflow <- workflow(kmeans_rec, kmeans_spec)

kmeans_cv <- vfold_cv(data, v = 5)

kmeans_grid <- grid_regular(num_clusters(), levels = 6)

kmeans_res <- tune_cluster(
  kmeans_wflow,
  resamples = kmeans_cv,
  grid = kmeans_grid,
  control = control_grid(save_pred = TRUE, extract = identity),
  metrics = cluster_metric_set(tot_wss, tot_sse, sse_ratio)
) 

collect_notes(kmeans_res)
#> # A tibble: 0 × 4
#> # … with 4 variables: id <chr>, location <chr>, type <chr>, note <chr>

Created on 2022-08-30 by the reprex package (v2.0.1)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.1 (2022-06-23) #> os macOS Monterey 12.2.1 #> system aarch64, darwin20 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz America/Los_Angeles #> date 2022-08-30 #> pandoc 2.18 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.2.0) #> backports 1.4.1 2021-12-13 [2] CRAN (R 4.2.0) #> broom * 1.0.0 2022-07-01 [2] CRAN (R 4.2.0) #> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.1) #> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0) #> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.1) #> colorspace 2.0-3 2022-02-21 [2] CRAN (R 4.2.0) #> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0) #> DBI 1.1.3 2022-06-18 [2] CRAN (R 4.2.0) #> dials * 1.0.0 2022-06-14 [2] CRAN (R 4.2.0) #> DiceDesign 1.9 2021-02-13 [2] CRAN (R 4.2.0) #> digest 0.6.29 2021-12-01 [2] CRAN (R 4.2.0) #> dplyr * 1.0.9 2022-04-28 [2] CRAN (R 4.2.0) #> ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.2.0) #> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.2.0) #> flexclust 1.4-1 2022-04-08 [2] CRAN (R 4.2.0) #> foreach 1.5.2 2022-02-02 [2] CRAN (R 4.2.0) #> fs 1.5.2 2021-12-08 [2] CRAN (R 4.2.0) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) #> future 1.27.0 2022-07-22 [2] CRAN (R 4.2.0) #> future.apply 1.9.0 2022-04-25 [2] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [2] CRAN (R 4.2.0) #> ggplot2 * 3.3.6 2022-05-03 [2] CRAN (R 4.2.0) #> globals 0.16.1 2022-08-28 [1] CRAN (R 4.2.0) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> gower 1.0.0 2022-02-03 [2] CRAN (R 4.2.0) #> GPfit 1.0-8 2019-02-08 [2] CRAN (R 4.2.0) #> gtable 0.3.0 2019-03-25 [2] CRAN (R 4.2.0) #> hardhat 1.2.0 2022-06-30 [2] CRAN (R 4.2.1) #> highr 0.9 2021-04-16 [2] CRAN (R 4.2.0) #> htmltools 0.5.3 2022-07-18 [2] CRAN (R 4.2.0) #> infer * 1.0.2 2022-06-10 [2] CRAN (R 4.2.0) #> ipred 0.9-13 2022-06-02 [2] CRAN (R 4.2.0) #> iterators 1.0.14 2022-02-05 [2] CRAN (R 4.2.0) #> knitr 1.39.7 2022-08-15 [1] Github (yihui/knitr@5481c3f) #> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.1) #> lava 1.6.10 2021-09-02 [2] CRAN (R 4.2.0) #> lhs 1.1.5 2022-03-22 [2] CRAN (R 4.2.0) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0) #> listenv 0.8.0 2019-12-05 [2] CRAN (R 4.2.0) #> lubridate 1.8.0 2021-10-07 [2] CRAN (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> MASS 7.3-57 2022-04-22 [2] CRAN (R 4.2.1) #> Matrix 1.4-1 2022-03-23 [2] CRAN (R 4.2.1) #> modeldata * 1.0.0 2022-07-01 [2] CRAN (R 4.2.0) #> modeltools 0.2-23 2020-03-05 [2] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [2] CRAN (R 4.2.0) #> nnet 7.3-17 2022-01-16 [2] CRAN (R 4.2.1) #> parallelly 1.32.1 2022-07-21 [2] CRAN (R 4.2.0) #> parsnip * 1.0.1.9000 2022-08-30 [1] Github (tidymodels/parsnip@e1eb30a) #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> prodlim 2019.11.13 2019-11-17 [2] CRAN (R 4.2.0) #> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [2] CRAN (R 4.2.0) #> R.methodsS3 1.8.2 2022-06-13 [2] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [2] CRAN (R 4.2.0) #> R.utils 2.12.0 2022-06-28 [2] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [2] CRAN (R 4.2.0) #> Rcpp 1.0.9 2022-07-08 [2] CRAN (R 4.2.0) #> RcppZiggurat 0.1.6 2020-10-20 [2] CRAN (R 4.2.0) #> recipes * 1.0.1.9000 2022-08-30 [1] Github (tidymodels/recipes@46d0019) #> reprex 2.0.1 2021-08-05 [2] CRAN (R 4.2.0) #> Rfast 2.0.6 2022-02-16 [2] CRAN (R 4.2.0) #> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.0) #> rmarkdown 2.14 2022-04-25 [2] CRAN (R 4.2.0) #> rpart 4.1.16 2022-01-24 [2] CRAN (R 4.2.1) #> rsample * 1.1.0 2022-08-08 [1] CRAN (R 4.2.0) #> rstudioapi 0.13 2020-11-12 [2] CRAN (R 4.2.0) #> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.2.0) #> stringi 1.7.8 2022-07-11 [2] CRAN (R 4.2.1) #> stringr 1.4.0 2019-02-10 [2] CRAN (R 4.2.0) #> styler 1.7.0 2022-03-13 [2] CRAN (R 4.2.0) #> survival 3.3-1 2022-03-03 [2] CRAN (R 4.2.1) #> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) #> tidyclust * 0.0.0.9000 2022-08-30 [1] Github (emilhvitfeldt/tidyclust@71a245a) #> tidymodels * 1.0.0 2022-07-13 [2] CRAN (R 4.2.1) #> tidyr * 1.2.0 2022-02-01 [2] CRAN (R 4.2.0) #> tidyselect 1.1.2 2022-02-21 [2] CRAN (R 4.2.0) #> timeDate 4021.104 2022-07-19 [2] CRAN (R 4.2.0) #> tune * 1.0.0 2022-07-07 [2] CRAN (R 4.2.1) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0) #> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [2] CRAN (R 4.2.0) #> workflows * 1.0.0.9000 2022-08-30 [1] Github (tidymodels/workflows@099a735) #> workflowsets * 1.0.0 2022-07-12 [2] CRAN (R 4.2.1) #> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.0) #> yaml 2.3.5 2022-02-21 [2] CRAN (R 4.2.0) #> yardstick * 1.0.0 2022-06-06 [2] CRAN (R 4.2.0) #> #> [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library #> [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
cgoo4 commented 2 years ago

Hi @EmilHvitfeldt - I've updated again with remotes::install_github("emilhvitfeldt/tidyclust") and re-run, but get the same outcome (includes session info):

library(tidyverse)
library(tidymodels)
library(tidyclust)

data <- tribble(
  ~var1, ~var2,
  -10.7309753827869, 7.31314782728295,
  0.158313453988934, -1.61716134057181,
  2.56976828347436, -0.0304718731434479,
  2.52510547176991, 2.25837680812956,
  -0.545994998160519, -1.65459969854165,
  4.24718480285034, 2.63196351710467,
  -2.58277023154306, 0.071240121563463,
  1.76791313026014, -0.0820188617066647,
  0.700250002891148, -0.28659873913792,
  2.05609909308338, -0.944805582445498,
  0.341405837432006, -1.32977660565642,
  -2.50062436356793, -1.90326151166205,
  -1.0388232093446, -0.36389143468345,
  -1.00255850689283, -1.6047655508424,
  1.63335997713595, 1.3094039086307,
  2.8965669433778, 2.53970798590398,
  1.9755753941663, 1.05180385078969,
  0.949450225797159, 0.0715029785385012,
  -3.11293609366517, -1.20094733897835,
  0.433470080004349, 1.44883471157841,
  1.80155162083184, 1.74231103529767,
  -2.0330697607949, -2.238240115786,
  0.226699387711105, -1.56773486777401,
  0.705202066774586, 1.41309135840208,
  -2.0229531809923, -3.80825276177795,
  1.54212334904791, -0.33715090285275,
  2.98458548773627, 2.88958633008886,
  -0.551832844262862, -2.05139067509532,
  2.82225740379044, 2.21534428308637,
  -2.74329702499752, -3.54358226626749,
  -0.468043631321413, -1.56406035159058,
  0.466410395768423, -0.926812424657721,
  -3.46941317956229, 0.0992081867745776
)

kmeans_spec <- k_means(num_clusters = tune()) |> 
  set_engine("stats", algorithm = "Lloyd")

kmeans_rec <- recipe(~ var1 + var2, data = data)

kmeans_wflow <- workflow(kmeans_rec, kmeans_spec)

kmeans_cv <- vfold_cv(data, v = 5)

kmeans_grid <- grid_regular(num_clusters(), levels = 6)

kmeans_res <- tune_cluster(
  kmeans_wflow,
  resamples = kmeans_cv,
  grid = kmeans_grid,
  control = control_grid(save_pred = TRUE, extract = identity),
  metrics = cluster_metric_set(tot_wss, tot_sse, sse_ratio)
) 
#> Warning: All models failed. See the `.notes` column.

collect_notes(kmeans_res)
#> # A tibble: 0 × 2
#> # … with 2 variables: id <chr>, .notes <???>

Created on 2022-08-31 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.1 (2022-06-23) #> os macOS Big Sur ... 10.16 #> system x86_64, darwin17.0 #> ui X11 #> language (EN) #> collate en_GB.UTF-8 #> ctype en_GB.UTF-8 #> tz Europe/London #> date 2022-08-31 #> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0) #> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0) #> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.1) #> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0) #> class 7.3-20 2022-01-16 [1] CRAN (R 4.2.1) #> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0) #> codetools 0.2-18 2020-11-04 [1] CRAN (R 4.2.1) #> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0) #> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0) #> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0) #> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.0) #> dials * 1.0.0 2022-06-14 [1] CRAN (R 4.2.0) #> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0) #> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0) #> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0) #> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0) #> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> flexclust 1.4-1 2022-04-08 [1] CRAN (R 4.2.0) #> forcats * 0.5.2 2022-08-19 [1] CRAN (R 4.2.1) #> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0) #> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) #> future 1.27.0 2022-07-22 [1] CRAN (R 4.2.1) #> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.2.0) #> gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1) #> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0) #> globals 0.16.1 2022-08-28 [1] CRAN (R 4.2.1) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.0) #> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.1) #> gower 1.0.0 2022-02-03 [1] CRAN (R 4.2.0) #> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0) #> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0) #> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0) #> haven 2.5.1 2022-08-22 [1] CRAN (R 4.2.1) #> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0) #> hms 1.1.2 2022-08-19 [1] CRAN (R 4.2.1) #> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.1) #> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.0) #> infer * 1.0.3 2022-08-22 [1] CRAN (R 4.2.1) #> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0) #> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0) #> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0) #> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.1) #> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.2.1) #> lava 1.6.10 2021-09-02 [1] CRAN (R 4.2.0) #> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.2.0) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0) #> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.2.0) #> lubridate 1.8.0.9000 2022-06-09 [1] Github (tidyverse/lubridate@0bb49b2) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> MASS 7.3-58.1 2022-08-03 [1] CRAN (R 4.2.0) #> Matrix 1.4-1 2022-03-23 [1] CRAN (R 4.2.1) #> modeldata * 1.0.0 2022-07-01 [1] CRAN (R 4.2.0) #> modelr 0.1.9 2022-08-19 [1] CRAN (R 4.2.1) #> modeltools 0.2-23 2020-03-05 [1] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> nnet 7.3-17 2022-01-16 [1] CRAN (R 4.2.1) #> parallelly 1.32.1 2022-07-21 [1] CRAN (R 4.2.1) #> parsnip * 1.0.1.9000 2022-08-28 [1] Github (tidymodels/parsnip@e1eb30a) #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0) #> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.1) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0) #> R.utils 2.12.0 2022-06-28 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.1) #> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0) #> readxl 1.4.1 2022-08-17 [1] CRAN (R 4.2.0) #> recipes * 1.0.1 2022-07-07 [1] CRAN (R 4.2.1) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0) #> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.0) #> rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.1) #> rpart 4.1.16 2022-01-24 [1] CRAN (R 4.2.1) #> rsample * 1.1.0 2022-08-08 [1] CRAN (R 4.2.1) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1) #> rvest 1.0.3 2022-08-19 [1] CRAN (R 4.2.1) #> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.1) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0) #> stringr * 1.4.1 2022-08-20 [1] CRAN (R 4.2.0) #> styler 1.7.0 2022-03-13 [1] CRAN (R 4.2.0) #> survival 3.4-0 2022-08-09 [1] CRAN (R 4.2.0) #> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.1) #> tidyclust * 0.0.0.9000 2022-08-31 [1] Github (emilhvitfeldt/tidyclust@e08174b) #> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) #> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0) #> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0) #> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.1) #> timeDate 4021.104 2022-07-19 [1] CRAN (R 4.2.0) #> tune * 1.0.0 2022-07-07 [1] CRAN (R 4.2.1) #> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0) #> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> workflows * 1.0.0.9000 2022-08-28 [1] Github (tidymodels/workflows@099a735) #> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.0) #> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.1) #> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0) #> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0) #> yardstick * 1.0.0 2022-06-06 [1] CRAN (R 4.2.0) #> #> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
stevensmallberg commented 2 years ago

Hi @EmilHvitfeldt, I've run into the same issue. I originally encountered the error working with my own [proprietary] data. As part of my troubleshooting, I created a fresh R project, installed the most recent version of tidyclust, copied the tuning and metrics vignette verbatim, and the error still appeared.

I hope this is helpful for additional troubleshooting (e.g. if some supporting package version discrepancy is at play here). Thanks.

Also, ditto the praise for tidyclust. Great package and great talk at conf! 🏆

library(parsnip)
library(workflows)

library(tidyclust)
#> 
#> Attaching package: 'tidyclust'
#> The following object is masked from 'package:parsnip':
#> 
#>     prepare_data
library(tidyverse)
library(tidymodels)

data("penguins", package = "modeldata")

penguins <- penguins %>%
  drop_na()

penguins_cv <- vfold_cv(penguins, v = 5)

kmeans_spec <- k_means(num_clusters = tune()) 

penguins_rec <- recipe(~ bill_length_mm + bill_depth_mm, 
                       data = penguins)

kmeans_wflow <- workflow(penguins_rec, kmeans_spec)

clust_num_grid <- grid_regular(num_clusters(), 
                               levels = 10)

clust_num_grid
#> # A tibble: 10 × 1
#>    num_clusters
#>           <int>
#>  1            1
#>  2            2
#>  3            3
#>  4            4
#>  5            5
#>  6            6
#>  7            7
#>  8            8
#>  9            9
#> 10           10

res <- tune_cluster(
  kmeans_wflow,
  resamples = penguins_cv,
  grid = clust_num_grid,
  control = control_grid(save_pred = TRUE, extract = identity),
  metrics = cluster_metric_set(tot_wss, tot_sse, sse_ratio)
) 
#> Warning: All models failed. See the `.notes` column.

res
#> # Tuning results
#> # 5-fold cross-validation 
#> # A tibble: 5 × 6
#>   splits           id    .metrics .notes .extracts .predictions
#>   <list>           <chr> <list>   <list> <list>    <list>      
#> 1 <split [266/67]> Fold1 <NULL>   <NULL> <NULL>    <NULL>      
#> 2 <split [266/67]> Fold2 <NULL>   <NULL> <NULL>    <NULL>      
#> 3 <split [266/67]> Fold3 <NULL>   <NULL> <NULL>    <NULL>      
#> 4 <split [267/66]> Fold4 <NULL>   <NULL> <NULL>    <NULL>      
#> 5 <split [267/66]> Fold5 <NULL>   <NULL> <NULL>    <NULL>

res_metrics <- res %>% collect_metrics()
#> Error in `estimate_tune_results()`:
#> ! All of the models failed. See the .notes column.
res_metrics
#> Error in eval(expr, envir, enclos): object 'res_metrics' not found

Created on 2022-08-31 by the reprex package (v2.0.1)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.0 (2022-04-22) #> os macOS Big Sur/Monterey 10.16 #> system x86_64, darwin17.0 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz America/Los_Angeles #> date 2022-08-31 #> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.2.0) #> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0) #> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) #> cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.2.0) #> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.0) #> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0) #> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.0) #> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0) #> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0) #> DBI 1.1.2 2021-12-20 [2] CRAN (R 4.2.0) #> dbplyr 2.2.1 2022-06-27 [2] CRAN (R 4.2.0) #> dials * 1.0.0 2022-06-14 [1] CRAN (R 4.2.0) #> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0) #> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0) #> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0) #> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0) #> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> forcats * 0.5.2 2022-08-19 [1] CRAN (R 4.2.0) #> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0) #> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) #> future 1.27.0 2022-07-22 [1] CRAN (R 4.2.0) #> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.2.0) #> gargle 1.2.0 2021-07-02 [2] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0) #> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0) #> globals 0.16.1 2022-08-28 [1] CRAN (R 4.2.0) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> googledrive 2.0.0 2021-07-08 [2] CRAN (R 4.2.0) #> googlesheets4 1.0.0 2021-07-21 [2] CRAN (R 4.2.0) #> gower 1.0.0 2022-02-03 [1] CRAN (R 4.2.0) #> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0) #> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0) #> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0) #> haven 2.5.0 2022-04-15 [2] CRAN (R 4.2.0) #> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0) #> hms 1.1.1 2021-09-26 [2] CRAN (R 4.2.0) #> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.0) #> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.0) #> infer * 1.0.3 2022-08-22 [1] CRAN (R 4.2.0) #> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0) #> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0) #> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0) #> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.0) #> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.0) #> lava 1.6.10 2021-09-02 [1] CRAN (R 4.2.0) #> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.2.0) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0) #> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.2.0) #> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> MASS 7.3-56 2022-03-23 [2] CRAN (R 4.2.0) #> Matrix 1.4-1 2022-03-23 [2] CRAN (R 4.2.0) #> modeldata * 1.0.0 2022-07-01 [1] CRAN (R 4.2.0) #> modelr 0.1.8 2020-05-19 [2] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> nnet 7.3-17 2022-01-16 [2] CRAN (R 4.2.0) #> parallelly 1.32.1 2022-07-21 [1] CRAN (R 4.2.0) #> parsnip * 1.0.1.9000 2022-08-31 [1] Github (tidymodels/parsnip@fdde60a) #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0) #> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.0) #> readr * 2.1.2 2022-01-30 [2] CRAN (R 4.2.0) #> readxl 1.4.0 2022-03-28 [2] CRAN (R 4.2.0) #> recipes * 1.0.1 2022-07-07 [1] CRAN (R 4.2.0) #> reprex 2.0.1 2021-08-05 [2] CRAN (R 4.2.0) #> rlang 1.0.5 2022-08-31 [1] CRAN (R 4.2.0) #> rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.0) #> rpart 4.1.16 2022-01-24 [2] CRAN (R 4.2.0) #> rsample * 1.1.0 2022-08-08 [1] CRAN (R 4.2.0) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0) #> rvest 1.0.2 2021-10-16 [2] CRAN (R 4.2.0) #> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0) #> stringr * 1.4.1 2022-08-20 [1] CRAN (R 4.2.0) #> survival 3.3-1 2022-03-03 [2] CRAN (R 4.2.0) #> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) #> tidyclust * 0.0.0.9000 2022-08-31 [1] Github (emilhvitfeldt/tidyclust@e08174b) #> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) #> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0) #> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0) #> tidyverse * 1.3.2 2022-07-18 [2] CRAN (R 4.2.0) #> timeDate 4021.104 2022-07-19 [1] CRAN (R 4.2.0) #> tune * 1.0.0 2022-07-07 [1] CRAN (R 4.2.0) #> tzdb 0.3.0 2022-03-28 [2] CRAN (R 4.2.0) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0) #> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> workflows * 1.0.0.9000 2022-08-31 [1] Github (tidymodels/workflows@099a735) #> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.0) #> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.0) #> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0) #> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0) #> yardstick * 1.0.0 2022-06-06 [1] CRAN (R 4.2.0) #> #> [1] /Users/steven.smallberg/Documents/sandbox/renv/library/R-4.2/x86_64-apple-darwin17.0 #> [2] /Library/Frameworks/R.framework/Versions/4.2/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
EmilHvitfeldt commented 2 years ago

Hello @cgoo4 & @stevensmallberg! Can one of you run the following code exactly and report back? I'm trying to narrow down the issue

library(tidyclust)
library(recipes)
library(workflows)

data("penguins", package = "modeldata")

penguins <- penguins %>%
  tidyr::drop_na()

kmeans_spec <- k_means(num_clusters = 3) 

penguins_rec <- recipe(~ bill_length_mm + bill_depth_mm, 
                       data = penguins)

kmeans_wflow <- workflow(penguins_rec, kmeans_spec)

fit(kmeans_wflow, data = penguins)
stevensmallberg commented 2 years ago

@EmilHvitfeldt Works just fine when the num_clusters argument of kmeans() is supplied explicitly.

I should've said originally — the workflow laid out in the kmeans vignette worked for me, including on my own data, without any errors.

library(tidyclust)
library(recipes)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#> 
#>     step
library(workflows)

data("penguins", package = "modeldata")

penguins <- penguins %>%
  tidyr::drop_na()

kmeans_spec <- k_means(num_clusters = 3) 

penguins_rec <- recipe(~ bill_length_mm + bill_depth_mm, 
                       data = penguins)

kmeans_wflow <- workflow(penguins_rec, kmeans_spec)

fit(kmeans_wflow, data = penguins)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: k_means()
#> 
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 0 Recipe Steps
#> 
#> ── Model ───────────────────────────────────────────────────────────────────────
#> K-means clustering with 3 clusters of sizes 112, 136, 85
#> 
#> Cluster means:
#>   bill_length_mm bill_depth_mm
#> 1       45.50982      15.68304
#> 2       38.42426      18.27794
#> 3       50.90353      17.33647
#> 
#> Clustering vector:
#>   [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#>  [38] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2
#>  [75] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 1 2
#> [112] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3
#> [149] 1 3 1 1 1 1 1 1 1 3 1 1 1 3 1 3 1 3 3 1 1 1 1 1 1 1 3 1 1 1 3 3 3 1 1 1 3
#> [186] 1 3 1 3 3 1 1 3 1 1 1 1 1 3 1 1 1 1 1 3 1 1 1 3 1 3 3 1 3 1 1 1 1 1 3 1 3
#> [223] 1 1 3 3 1 3 1 3 1 3 1 3 1 3 1 3 1 3 3 1 1 3 1 3 1 3 1 1 3 1 1 3 3 1 3 1 3
#> [260] 3 1 1 3 1 3 1 3 3 1 3 1 1 3 1 3 1 3 1 3 1 3 3 3 1 3 1 3 1 3 1 3 3 3 1 3 2
#> [297] 3 1 3 3 1 1 3 1 3 3 1 3 1 3 3 3 3 3 3 1 3 1 3 1 3 1 3 3 1 3 1 1 3 1 3 3 3
#> 
#> Within cluster sum of squares by cluster:
#> [1] 742.0970 904.9838 617.9859
#>  (between_SS / total_SS =  79.8 %)
#> 
#> Available components:
#> 
#> [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
#> [6] "betweenss"    "size"         "iter"         "ifault"

Created on 2022-08-31 by the reprex package (v2.0.1)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.0 (2022-04-22) #> os macOS Big Sur/Monterey 10.16 #> system x86_64, darwin17.0 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz America/Los_Angeles #> date 2022-08-31 #> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.2.0) #> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.0) #> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0) #> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.0) #> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0) #> DBI 1.1.2 2021-12-20 [2] CRAN (R 4.2.0) #> dials 1.0.0 2022-06-14 [1] CRAN (R 4.2.0) #> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0) #> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0) #> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0) #> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0) #> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0) #> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) #> future 1.27.0 2022-07-22 [1] CRAN (R 4.2.0) #> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0) #> ggplot2 3.3.6 2022-05-03 [1] CRAN (R 4.2.0) #> globals 0.16.1 2022-08-28 [1] CRAN (R 4.2.0) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> gower 1.0.0 2022-02-03 [1] CRAN (R 4.2.0) #> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0) #> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0) #> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0) #> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0) #> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.0) #> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0) #> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0) #> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.0) #> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.0) #> lava 1.6.10 2021-09-02 [1] CRAN (R 4.2.0) #> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.2.0) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0) #> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.2.0) #> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> MASS 7.3-56 2022-03-23 [2] CRAN (R 4.2.0) #> Matrix 1.4-1 2022-03-23 [2] CRAN (R 4.2.0) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> nnet 7.3-17 2022-01-16 [2] CRAN (R 4.2.0) #> parallelly 1.32.1 2022-07-21 [1] CRAN (R 4.2.0) #> parsnip 1.0.1.9000 2022-08-31 [1] Github (tidymodels/parsnip@fdde60a) #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0) #> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.0) #> recipes * 1.0.1 2022-07-07 [1] CRAN (R 4.2.0) #> reprex 2.0.1 2021-08-05 [2] CRAN (R 4.2.0) #> rlang 1.0.5 2022-08-31 [1] CRAN (R 4.2.0) #> rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.0) #> rpart 4.1.16 2022-01-24 [2] CRAN (R 4.2.0) #> rsample 1.1.0 2022-08-08 [1] CRAN (R 4.2.0) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0) #> scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0) #> stringr 1.4.1 2022-08-20 [1] CRAN (R 4.2.0) #> survival 3.3-1 2022-03-03 [2] CRAN (R 4.2.0) #> tibble 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) #> tidyclust * 0.0.0.9000 2022-08-31 [1] Github (emilhvitfeldt/tidyclust@e08174b) #> tidyr 1.2.0 2022-02-01 [1] CRAN (R 4.2.0) #> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0) #> timeDate 4021.104 2022-07-19 [1] CRAN (R 4.2.0) #> tune 1.0.0 2022-07-07 [1] CRAN (R 4.2.0) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0) #> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> workflows * 1.0.0.9000 2022-08-31 [1] Github (tidymodels/workflows@099a735) #> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.0) #> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0) #> yardstick 1.0.0 2022-06-06 [1] CRAN (R 4.2.0) #> #> [1] /Users/steven.smallberg/Documents/sandbox/renv/library/R-4.2/x86_64-apple-darwin17.0 #> [2] /Library/Frameworks/R.framework/Versions/4.2/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
cgoo4 commented 2 years ago

@EmilHvitfeldt Ditto per @stevensmallberg for me. The k-means vignette (with specified num_clusters()) works. The Tuning Cluster Models vignette returns "All models failed." I also tried the dev version of tune for tune::control_grid() with the same outcome.

If, instead of using a workflow, I include the spec and preprocessor in tune_cluster() and use control_cluster(), then I get a different error per below:

library(tidyclust)
library(tidyverse)
library(tidymodels)

data("penguins", package = "modeldata")

penguins <- penguins %>%
  drop_na()

penguins_cv <- vfold_cv(penguins, v = 5)

kmeans_spec <- k_means(num_clusters = tune()) 

penguins_rec <- recipe(~ bill_length_mm + bill_depth_mm, 
                       data = penguins)

# kmeans_wflow <- workflow(penguins_rec, kmeans_spec)

clust_num_grid <- grid_regular(num_clusters(), 
                               levels = 10)

clust_num_grid
#> # A tibble: 10 × 1
#>    num_clusters
#>           <int>
#>  1            1
#>  2            2
#>  3            3
#>  4            4
#>  5            5
#>  6            6
#>  7            7
#>  8            8
#>  9            9
#> 10           10

res <- tune_cluster(
  kmeans_spec,
  preprocessor = penguins_rec,
  resamples = penguins_cv,
  grid = clust_num_grid,
  control = control_cluster(),
  metrics = cluster_metric_set(tot_wss, tot_sse, sse_ratio)
) 
#> Error in allow && is_par: invalid 'x' type in 'x && y'

res
#> Error in eval(expr, envir, enclos): object 'res' not found

Created on 2022-09-01 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.1 (2022-06-23) #> os macOS Big Sur ... 10.16 #> system x86_64, darwin17.0 #> ui X11 #> language (EN) #> collate en_GB.UTF-8 #> ctype en_GB.UTF-8 #> tz Europe/London #> date 2022-09-01 #> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0) #> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0) #> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.1) #> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0) #> class 7.3-20 2022-01-16 [1] CRAN (R 4.2.1) #> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0) #> codetools 0.2-18 2020-11-04 [1] CRAN (R 4.2.1) #> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0) #> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0) #> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0) #> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.0) #> dials * 1.0.0 2022-06-14 [1] CRAN (R 4.2.0) #> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.2.0) #> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0) #> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.1) #> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0) #> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0) #> forcats * 0.5.2 2022-08-19 [1] CRAN (R 4.2.1) #> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0) #> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) #> future 1.27.0 2022-07-22 [1] CRAN (R 4.2.1) #> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.2.0) #> gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.0) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1) #> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0) #> globals 0.16.1 2022-08-28 [1] CRAN (R 4.2.1) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0) #> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.0) #> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.1) #> gower 1.0.0 2022-02-03 [1] CRAN (R 4.2.0) #> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.2.0) #> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.1) #> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.0) #> haven 2.5.1 2022-08-22 [1] CRAN (R 4.2.1) #> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0) #> hms 1.1.2 2022-08-19 [1] CRAN (R 4.2.1) #> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.1) #> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.0) #> infer * 1.0.3 2022-08-22 [1] CRAN (R 4.2.1) #> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.0) #> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0) #> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0) #> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.1) #> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.2.1) #> lava 1.6.10 2021-09-02 [1] CRAN (R 4.2.0) #> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.2.0) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0) #> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.2.0) #> lubridate 1.8.0.9000 2022-06-09 [1] Github (tidyverse/lubridate@0bb49b2) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0) #> MASS 7.3-58.1 2022-08-03 [1] CRAN (R 4.2.0) #> Matrix 1.4-1 2022-03-23 [1] CRAN (R 4.2.1) #> modeldata * 1.0.0 2022-07-01 [1] CRAN (R 4.2.0) #> modelr 0.1.9 2022-08-19 [1] CRAN (R 4.2.1) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0) #> nnet 7.3-17 2022-01-16 [1] CRAN (R 4.2.1) #> parallelly 1.32.1 2022-07-21 [1] CRAN (R 4.2.1) #> parsnip * 1.0.1.9000 2022-09-01 [1] Github (tidymodels/parsnip@fdde60a) #> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0) #> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.0) #> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.1) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0) #> R.utils 2.12.0 2022-06-28 [1] CRAN (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0) #> Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.2.1) #> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0) #> readxl 1.4.1 2022-08-17 [1] CRAN (R 4.2.0) #> recipes * 1.0.1 2022-07-07 [1] CRAN (R 4.2.1) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0) #> rlang 1.0.5 2022-08-31 [1] CRAN (R 4.2.1) #> rmarkdown 2.16 2022-08-24 [1] CRAN (R 4.2.1) #> rpart 4.1.16 2022-01-24 [1] CRAN (R 4.2.1) #> rsample * 1.1.0 2022-08-08 [1] CRAN (R 4.2.1) #> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1) #> rvest 1.0.3 2022-08-19 [1] CRAN (R 4.2.1) #> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.1) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0) #> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0) #> stringr * 1.4.1 2022-08-20 [1] CRAN (R 4.2.0) #> styler 1.7.0 2022-03-13 [1] CRAN (R 4.2.0) #> survival 3.4-0 2022-08-09 [1] CRAN (R 4.2.0) #> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.1) #> tidyclust * 0.0.0.9000 2022-08-31 [1] Github (emilhvitfeldt/tidyclust@e08174b) #> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) #> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0) #> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0) #> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.1) #> timeDate 4021.104 2022-07-19 [1] CRAN (R 4.2.0) #> tune * 1.0.0.9000 2022-09-01 [1] Github (tidymodels/tune@5b4ba2c) #> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0) #> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0) #> workflows * 1.0.0.9000 2022-09-01 [1] Github (tidymodels/workflows@eb2fa38) #> workflowsets * 1.0.0 2022-07-12 [1] CRAN (R 4.2.0) #> xfun 0.32 2022-08-10 [1] CRAN (R 4.2.1) #> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0) #> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0) #> yardstick * 1.0.0 2022-06-06 [1] CRAN (R 4.2.0) #> #> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
EmilHvitfeldt commented 2 years ago

@stevensmallberg helped me figure this one out and it turns out it was an installation issue. To fix this problem, please install the {flexclust} and {Rfast} packages. Using the most recent version of tidyclust by running devtools::install_github("EmilHvitfeldt/tidyclust") should force this change

github-actions[bot] commented 1 year ago

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.