mariesosa commented 3 years ago

Describe the bug

TuneSearch raise a ValueError: The 'groups' parameter should not be None. with sklearn cv split based on groups (like GroupKFold) when arguments max_iters and early_stopping are specified. The behavior is the same for hyperopt and random search algorithms.

Steps/Code to Reproduce

from tune_sklearn import TuneSearchCV
from ray import tune
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_blobsfrom tune_sklearn import TuneSearchCV
from ray import tune
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_blobs
from sklearn.model_selection import GroupKFold

# Create a test dataset
X, y = make_blobs(n_samples=10, centers=2, n_features=4)
groups = ['a', 'b', 'b', 'a', 'c', 'a', 'c', 'c', 'a', 'a']
cv = GroupKFold(n_splits=10)

# Define the algorithm to optimize
clf = LogisticRegression()
params = {'tol': tune.loguniform(1e-5, 1e-2)}

# Perform hyperparameter optimization
tune_search = TuneSearchCV(
    clf, params, cv=cv, scoring='roc_auc', search_optimization="random", random_state=0,
    max_iters=8, early_stopping=True, n_trials=10,
)
tune_search.fit(X, y, groups=groups)

Expected Results

No error is thrown.

Actual Results

Trial _Trainable_58c49_00000: Error processing event.

---------------------------------------------------------------------------
RayTaskError(ValueError)                  Traceback (most recent call last)
<ipython-input-20-bcf535e47b98> in <module>
      5 )
      6 
----> 7 tune_search.fit(X, y, groups=groups)

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/tune_sklearn/tune_basesearch.py in fit(self, X, y, groups, **fit_params)
    662                                     "To show process output, set verbose=2.")
    663 
--> 664             result = self._fit(X, y, groups, **fit_params)
    665 
    666             if not ray_init and ray.is_initialized():

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/tune_sklearn/tune_basesearch.py in _fit(self, X, y, groups, **fit_params)
    563 
    564         self._fill_config_hyperparam(config)
--> 565         analysis = self._tune_run(config, resources_per_trial)
    566 
    567         self.cv_results_ = self._format_results(self.n_splits, analysis)

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/tune_sklearn/tune_search.py in _tune_run(self, config, resources_per_trial)
    713                 "ignore", message="fail_fast='raise' "
    714                 "detected.")
--> 715             analysis = tune.run(trainable, **run_args)
    716         return analysis

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/tune.py in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, search_alg, scheduler, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, verbose, progress_reporter, log_to_file, trial_name_creator, trial_dirname_creator, sync_config, export_formats, max_failures, fail_fast, restore, server_port, resume, queue_trials, reuse_actors, trial_executor, raise_on_failed_trial, callbacks, loggers, ray_auto_init, run_errored_only, global_checkpoint_period, with_server, upload_dir, sync_to_cloud, sync_to_driver, sync_on_checkpoint)
    419     tune_start = time.time()
    420     while not runner.is_finished():
--> 421         runner.step()
    422         if has_verbosity(Verbosity.V1_EXPERIMENT):
    423             _report_progress(runner, progress_reporter)

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/trial_runner.py in step(self)
    400                 if self.trial_executor.in_staging_grace_period():
    401                     timeout = 0.1
--> 402                 self._process_events(timeout=timeout)  # blocking
    403             else:
    404                 self.trial_executor.on_no_available_trials(self)

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/trial_runner.py in _process_events(self, timeout)
    558             else:
    559                 with warn_if_slow("process_trial"):
--> 560                     self._process_trial(trial)
    561 
    562             # `self._queued_trial_decisions` now contains a final decision

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/trial_runner.py in _process_trial(self, trial)
    584         """
    585         try:
--> 586             results = self.trial_executor.fetch_result(trial)
    587             with warn_if_slow(
    588                     "process_trial_results",

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py in fetch_result(self, trial)
    607         self._running.pop(trial_future[0])
    608         with warn_if_slow("fetch_result"):
--> 609             result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
    610 
    611         # For local mode

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/_private/client_mode_hook.py in wrapper(*args, **kwargs)
     45         if client_mode_enabled and _client_hook_enabled:
     46             return getattr(ray, func.__name__)(*args, **kwargs)
---> 47         return func(*args, **kwargs)
     48 
     49     return wrapper

~/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/worker.py in get(object_refs, timeout)
   1454                     worker.core_worker.dump_object_store_memory_usage()
   1455                 if isinstance(value, RayTaskError):
-> 1456                     raise value.as_instanceof_cause()
   1457                 else:
   1458                     raise value

RayTaskError(ValueError): ray::_Trainable.train_buffered() (pid=2894346, ip=10.41.1.103)
  File "python/ray/_raylet.pyx", line 480, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 432, in ray._raylet.execute_task.function_executor
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/trainable.py", line 167, in train_buffered
    result = self.train()
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/ray/tune/trainable.py", line 226, in train
    result = self.step()
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/tune_sklearn/_trainable.py", line 106, in step
    return self._train()
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/tune_sklearn/_trainable.py", line 176, in _train
    for i, (train, test) in enumerate(self.cv.split(self.X, self.y)):
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/sklearn/model_selection/_split.py", line 332, in split
    for train, test in super().split(X, y, groups):
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/sklearn/model_selection/_split.py", line 80, in split
    for test_index in self._iter_test_masks(X, y, groups):
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/sklearn/model_selection/_split.py", line 92, in _iter_test_masks
    for test_index in self._iter_test_indices(X, y, groups):
  File "/home/user/virtualenvs/tf-gpu-2.2/lib/python3.6/site-packages/sklearn/model_selection/_split.py", line 503, in _iter_test_indices
    raise ValueError("The 'groups' parameter should not be None.")
ValueError: The 'groups' parameter should not be None.

Versions

hyperopt==0.2.5 numpy==1.18.4 ray==1.2.0 scikit-learn==0.24.1 tune_sklearn==0.2.1

richardliaw commented 3 years ago

Seems like a scikit-learn issue? https://github.com/scikit-learn/scikit-learn/issues/12516

richardliaw commented 3 years ago

Looks like we have to fix our implementation...

richardliaw commented 3 years ago

@mariesosa OK I just opened a fix! Let me know if that works for you (#191 )

mariesosa commented 3 years ago

I tested it with various group split functions and it seems to work fine. Thanks.

ray-project / tune-sklearn

[bug] TuneSearch not working for sklearn cv split based on groups with early_stopping #189

Describe the bug

Steps/Code to Reproduce

Expected Results

Actual Results

Versions