tidymodels / discrim

Wrappers for discriminant analysis and naive Bayes models for use with the parsnip package
https://discrim.tidymodels.org
Other
28 stars 3 forks source link

Error fitting resamples in parallel #13

Closed lymanmark closed 4 years ago

lymanmark commented 4 years ago

Trying to fit_resamples in parallel fails for discrim_linear.

library(doFuture)
#> Loading required package: globals
#> Loading required package: future
#> Loading required package: foreach
#> Loading required package: iterators
#> Loading required package: parallel
library(tune)
library(rsample)
library(discrim)
#> Loading required package: parsnip

registerDoFuture()
plan(multisession, workers = 2)

set.seed(123)

discrim_mod <- discrim_linear() %>% 
  set_engine("MASS")

folds <- vfold_cv(iris, v = 2)

fit_resamples(discrim_mod, Species ~ ., folds)
#> x id, out_id, in_id, data: internal: Error in rlang::env_get(mod_env, items): argument "default" is...
#> x id, out_id, in_id, data: internal: Error in rlang::env_get(mod_env, items): argument "default" is...
#> Warning: All models failed in [fit_resamples()]. See the `.notes` column.
#> Warning: This tuning result has notes. Example notes on model fitting include:
#> internal: Error in rlang::env_get(mod_env, items): argument "default" is missing, with no default
#> internal: Error in rlang::env_get(mod_env, items): argument "default" is missing, with no default
#> # Resampling results
#> # 2-fold cross-validation 
#> # A tibble: 2 x 4
#>   splits          id    .metrics .notes          
#>   <list>          <chr> <list>   <list>          
#> 1 <split [75/75]> Fold1 <NULL>   <tibble [1 x 1]>
#> 2 <split [75/75]> Fold2 <NULL>   <tibble [1 x 1]>
ecsalomon commented 4 years ago

I arrived at the same error through a different path (tune_grid rather than fit_resamples), and have encountered for both discrim_linear and naive_Bayes.

library(tidyverse)
library(tidymodels)
library(discrim)
library(doParallel)

model_spec <- naive_Bayes(smoothness = tune(), Laplace = tune()) %>%
  set_engine("naivebayes") %>%
  set_mode("classification")

grid = expand.grid(
  Laplace = 0, # :5,
  smoothness = seq(0, 5, by = 1)
)

data(iris)

iris_recipe <-
  recipe(Species ~ ., data = iris)

set.seed(45)
repeated_cv <- vfold_cv(iris, v = 5, repeats = 5)

wflow <- workflow() %>%
  add_model(model_spec) %>%
  add_recipe(iris_recipe)

cl <- makePSOCKcluster(7)
registerDoParallel(cl)

tune_grid(
  wflow,
  repeated_cv,
  grid = grid,
  metrics = metric_set(accuracy, kap),
  control = control_grid(save_pred = TRUE)
)

stopCluster(cl)
topepo commented 4 years ago

Temporary solution in tidymodels/tune#265 if you use a cluster object:

library(tidyverse)
library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.1 ──
#> ✔ broom     0.7.0          ✔ recipes   0.1.13    
#> ✔ dials     0.0.8.9000     ✔ rsample   0.0.7     
#> ✔ infer     0.5.2          ✔ tune      0.1.1.9000
#> ✔ modeldata 0.0.2          ✔ workflows 0.1.3     
#> ✔ parsnip   0.1.3          ✔ yardstick 0.0.7
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> ✖ scales::discard() masks purrr::discard()
#> ✖ dplyr::filter()   masks stats::filter()
#> ✖ recipes::fixed()  masks stringr::fixed()
#> ✖ dplyr::lag()      masks stats::lag()
#> ✖ yardstick::spec() masks readr::spec()
#> ✖ recipes::step()   masks stats::step()
library(discrim)
#> 
#> Attaching package: 'discrim'
#> The following object is masked from 'package:dials':
#> 
#>     smoothness
library(doParallel)
#> 
#> Attaching package: 'foreach'
#> The following objects are masked from 'package:purrr':
#> 
#>     accumulate, when
#> Loading required package: iterators
#> Loading required package: parallel

model_spec <- naive_Bayes(smoothness = tune(), Laplace = tune()) %>%
 set_engine("naivebayes") %>%
 set_mode("classification")

grid = expand.grid(
 Laplace = 0, # :5,
 smoothness = seq(0, 5, by = 1)
)

data(iris)

iris_recipe <-
 recipe(Species ~ ., data = iris)

set.seed(45)
repeated_cv <- vfold_cv(iris, v = 5, repeats = 5)

wflow <- workflow() %>%
 add_model(model_spec) %>%
 add_recipe(iris_recipe)

cl <- makePSOCKcluster(7)
registerDoParallel(cl)
parallel::clusterEvalQ(cl, {library(discrim)})
#> [[1]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"     
#> 
#> [[2]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"     
#> 
#> [[3]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"     
#> 
#> [[4]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"     
#> 
#> [[5]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"     
#> 
#> [[6]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"     
#> 
#> [[7]]
#> [1] "discrim"   "parsnip"   "stats"     "graphics"  "grDevices" "utils"    
#> [7] "datasets"  "methods"   "base"

tune_grid(
 wflow,
 repeated_cv,
 grid = grid,
 metrics = metric_set(accuracy, kap),
 control = control_grid(save_pred = TRUE)
)
#> Warning: This tuning result has notes. Example notes on model fitting include:
#> model 1/6: Error in density.default(x, na.rm = TRUE, ...): 'bw' is not positive.
#> model 1/6: Error in density.default(x, na.rm = TRUE, ...): 'bw' is not positive.
#> model 1/6: Error in density.default(x, na.rm = TRUE, ...): 'bw' is not positive.
#> # Tuning results
#> # 5-fold cross-validation repeated 5 times 
#> # A tibble: 25 x 6
#>    splits          id      id2   .metrics        .notes         .predictions    
#>    <list>          <chr>   <chr> <list>          <list>         <list>          
#>  1 <split [120/30… Repeat1 Fold1 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  2 <split [120/30… Repeat1 Fold2 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  3 <split [120/30… Repeat1 Fold3 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  4 <split [120/30… Repeat1 Fold4 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  5 <split [120/30… Repeat1 Fold5 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  6 <split [120/30… Repeat2 Fold1 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  7 <split [120/30… Repeat2 Fold2 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  8 <split [120/30… Repeat2 Fold3 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#>  9 <split [120/30… Repeat2 Fold4 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#> 10 <split [120/30… Repeat2 Fold5 <tibble [10 × … <tibble [1 × … <tibble [150 × …
#> # … with 15 more rows

stopCluster(cl)

Created on 2020-08-20 by the reprex package (v0.3.0)

ecsalomon commented 4 years ago

Confirming that this worked for me. Thanks for sharing!

github-actions[bot] commented 3 years ago

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.