tidymodels / tidyclust

A tidy unified interface to clustering models
https://tidyclust.tidymodels.org/
Other
108 stars 14 forks source link

Tuning k-means fails when running in parallel #182

Open KazBarker opened 5 months ago

KazBarker commented 5 months ago

The problem

I'm having trouble with tuning k-means clustering in parallel. Everything works as expected when running sequentially, but in parallel the following warning is returned:

Warning: All models failed. See the `.notes` column.

No error is thrown so the code continues running, but the returned data frame is empty.

Reproducible example

library(doParallel)
#> Loading required package: foreach
#> Loading required package: iterators
#> Loading required package: parallel
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.3.3
#> Warning: package 'ggplot2' was built under R version 4.3.3
#> Warning: package 'tidyr' was built under R version 4.3.3
#> Warning: package 'readr' was built under R version 4.3.3
#> Warning: package 'dplyr' was built under R version 4.3.3
#> Warning: package 'stringr' was built under R version 4.3.3
#> Warning: package 'lubridate' was built under R version 4.3.3
library(tidymodels)
#> Warning: package 'dials' was built under R version 4.3.3
#> Warning: package 'scales' was built under R version 4.3.3
#> Warning: package 'parsnip' was built under R version 4.3.3
#> Warning: package 'recipes' was built under R version 4.3.3
#> Warning: package 'workflows' was built under R version 4.3.3
library(tidyclust)
#> Warning: package 'tidyclust' was built under R version 4.3.3
#> 
#> Attaching package: 'tidyclust'
#> The following objects are masked from 'package:parsnip':
#> 
#>     knit_engine_docs, list_md_problems
library(tune)

my_parallel <- parallel::makePSOCKcluster(parallel::detectCores(logical = FALSE))
doParallel::registerDoParallel(my_parallel)

set.seed(123)

penguin_data <- modeldata::penguins %>% drop_na()
data_form <- ~bill_length_mm + bill_depth_mm

penguin_recipe <- recipe(data_form, data = penguin_data) %>% 
  step_normalize(all_predictors())

kmeans_spec <- k_means(num_clusters = tune(),
                       engine = 'stats',
                       mode = 'partition')

kmeans_wkfl <- workflow() %>% 
  add_recipe(penguin_recipe) %>% 
  add_model(kmeans_spec)

cl_resamples <- vfold_cv(penguin_data, v = 2)
cl_grid <- tibble(num_clusters = 1:10)

tuning_result <- tune_cluster(kmeans_wkfl,
                              resamples = cl_resamples,
                              grid = cl_grid,
                              metrics = cluster_metric_set(sse_within_total))
#> Warning: All models failed. See the `.notes` column.
tuning_result$.notes
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL

Created on 2024-04-26 with reprex v2.1.0

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.3.2 (2023-10-31 ucrt) #> os Windows 11 x64 (build 22631) #> system x86_64, mingw32 #> ui RTerm #> language (EN) #> collate English_United States.utf8 #> ctype English_United States.utf8 #> tz America/New_York #> date 2024-04-26 #> pandoc 3.1.7 @ C:/Users/Kbark/AppData/Local/Pandoc/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> backports 1.4.1 2021-12-13 [1] CRAN (R 4.3.0) #> broom * 1.0.5 2023-06-09 [1] CRAN (R 4.3.1) #> class 7.3-22 2023-05-03 [2] CRAN (R 4.3.2) #> cli 3.6.2 2023-12-11 [1] CRAN (R 4.3.3) #> codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2) #> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.1) #> data.table 1.15.2 2024-02-29 [1] CRAN (R 4.3.3) #> dials * 1.2.1 2024-02-22 [1] CRAN (R 4.3.3) #> DiceDesign 1.10 2023-12-07 [1] CRAN (R 4.3.2) #> digest 0.6.35 2024-03-11 [1] CRAN (R 4.3.3) #> doParallel * 1.0.17 2022-02-07 [1] CRAN (R 4.3.2) #> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.3) #> evaluate 0.23 2023-11-01 [1] CRAN (R 4.3.3) #> fansi 1.0.6 2023-12-08 [1] CRAN (R 4.3.3) #> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.1) #> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.1) #> foreach * 1.5.2 2022-02-02 [1] CRAN (R 4.3.2) #> fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1) #> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.3.2) #> future 1.33.1 2023-12-22 [1] CRAN (R 4.3.2) #> future.apply 1.11.1 2023-12-21 [1] CRAN (R 4.3.2) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.1) #> ggplot2 * 3.5.0 2024-02-23 [1] CRAN (R 4.3.3) #> globals 0.16.3 2024-03-08 [1] CRAN (R 4.3.3) #> glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.3) #> gower 1.0.1 2022-12-22 [1] CRAN (R 4.3.1) #> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.3.2) #> gtable 0.3.4 2023-08-21 [1] CRAN (R 4.3.1) #> hardhat 1.3.1 2024-02-02 [1] CRAN (R 4.3.2) #> hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.1) #> htmltools 0.5.7 2023-11-03 [1] CRAN (R 4.3.3) #> infer * 1.0.6 2024-01-31 [1] CRAN (R 4.3.2) #> ipred 0.9-14 2023-03-09 [1] CRAN (R 4.3.2) #> iterators * 1.0.14 2022-02-05 [1] CRAN (R 4.3.2) #> knitr 1.45 2023-10-30 [1] CRAN (R 4.3.3) #> lattice 0.21-9 2023-10-01 [2] CRAN (R 4.3.2) #> lava 1.8.0 2024-03-05 [1] CRAN (R 4.3.3) #> lhs 1.1.6 2022-12-17 [1] CRAN (R 4.3.2) #> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.3) #> listenv 0.9.1 2024-01-29 [1] CRAN (R 4.3.2) #> lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.3.3) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.1) #> MASS 7.3-60 2023-05-04 [2] CRAN (R 4.3.2) #> Matrix 1.6-5 2024-01-11 [1] CRAN (R 4.3.3) #> modeldata * 1.3.0 2024-01-21 [1] CRAN (R 4.3.2) #> modelenv 0.1.1 2023-03-08 [1] CRAN (R 4.3.2) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.1) #> nnet 7.3-19 2023-05-03 [2] CRAN (R 4.3.2) #> parallelly 1.37.1 2024-02-29 [1] CRAN (R 4.3.3) #> parsnip * 1.2.0 2024-02-16 [1] CRAN (R 4.3.3) #> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.1) #> prodlim 2023.08.28 2023-08-28 [1] CRAN (R 4.3.2) #> purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.3.1) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.3) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.1) #> R.oo 1.26.0 2024-01-24 [1] CRAN (R 4.3.2) #> R.utils 2.12.3 2023-11-18 [1] CRAN (R 4.3.3) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.1) #> Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.3.3) #> readr * 2.1.5 2024-01-10 [1] CRAN (R 4.3.3) #> recipes * 1.0.10 2024-02-18 [1] CRAN (R 4.3.3) #> reprex 2.1.0 2024-01-11 [1] CRAN (R 4.3.3) #> rlang 1.1.3 2024-01-10 [1] CRAN (R 4.3.3) #> rmarkdown 2.26 2024-03-05 [1] CRAN (R 4.3.3) #> rpart 4.1.21 2023-10-09 [2] CRAN (R 4.3.2) #> rsample * 1.2.0 2023-08-23 [1] CRAN (R 4.3.2) #> rstudioapi 0.15.0 2023-07-07 [1] CRAN (R 4.3.1) #> scales * 1.3.0 2023-11-28 [1] CRAN (R 4.3.3) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.3) #> stringi 1.8.3 2023-12-11 [1] CRAN (R 4.3.2) #> stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.3.3) #> styler 1.10.2 2023-08-29 [1] CRAN (R 4.3.3) #> survival 3.5-8 2024-02-14 [1] CRAN (R 4.3.3) #> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.1) #> tidyclust * 0.2.1 2024-02-29 [1] CRAN (R 4.3.3) #> tidymodels * 1.1.1 2023-08-24 [1] CRAN (R 4.3.2) #> tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.3.3) #> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.3) #> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.3) #> timechange 0.3.0 2024-01-18 [1] CRAN (R 4.3.3) #> timeDate 4032.109 2023-12-14 [1] CRAN (R 4.3.2) #> tune * 1.2.1 2024-04-18 [1] CRAN (R 4.3.2) #> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.1) #> utf8 1.2.4 2023-10-22 [1] CRAN (R 4.3.3) #> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.3) #> withr 3.0.0 2024-01-16 [1] CRAN (R 4.3.3) #> workflows * 1.1.4 2024-02-19 [1] CRAN (R 4.3.3) #> workflowsets * 1.0.1 2023-04-06 [1] CRAN (R 4.3.2) #> xfun 0.42 2024-02-08 [1] CRAN (R 4.3.3) #> yaml 2.3.8 2023-12-11 [1] CRAN (R 4.3.2) #> yardstick * 1.3.0 2024-01-19 [1] CRAN (R 4.3.2) #> #> [1] C:/Users/Kbark/AppData/Local/R/win-library/4.3 #> [2] C:/Program Files/R/R-4.3.2/library #> #> ────────────────────────────────────────────────────────────────────────────── ```