microsoft / FLAML

A fast library for AutoML and tuning. Join our Discord: https://discord.gg/Cppx2vSPVP.
https://microsoft.github.io/FLAML/
MIT License
3.75k stars 495 forks source link

CatBoost Fails with Keyword 'groups' #304

Open knoam opened 2 years ago

knoam commented 2 years ago

Here's the code:

settings = {
    "time_budget": 360,  
    "metric": 'ap',   
    "task": 'classification', 
    "log_file_name": f'{output_dir}/flaml1.log',  
    "seed": 7654321,    
    "log_training_metric": True,
    "groups": group_id,
    "estimator_list": ['catboost']  
}

automl.fit(X_train=X_train, y_train=y_train, **settings)

Here's the output:

[flaml.automl: 11-23 14:03:00] {1489} INFO - Evaluation method: holdout
[flaml.automl: 11-23 14:03:05] {1540} INFO - Minimizing error metric: 1-ap
[flaml.automl: 11-23 14:03:05] {1577} INFO - List of ML learners in AutoML Run: ['catboost']
[flaml.automl: 11-23 14:03:05] {1826} INFO - iteration 0, current learner catboost

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-26-4e43c07e607c> in <module>
----> 1 automl.fit(X_train=X_train, y_train=y_train, **settings)

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   1601             with training_log_writer(log_file_name, append_log) as save_helper:
   1602                 self._training_log = save_helper
-> 1603                 self._search()
   1604         else:
   1605             self._training_log = None

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2117 
   2118         if not self._use_ray:
-> 2119             self._search_sequential()
   2120         else:
   2121             self._search_parallel()

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   1915                 time_budget_s=min(budget_left, self._state.train_time_limit),
   1916                 verbose=max(self.verbose - 3, 0),
-> 1917                 use_ray=False,
   1918             )
   1919             time_used = time.time() - start_run_time

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/tune/tune.py in run(training_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, prune_attr, min_resource, max_resource, reduction_factor, report_intermediate_result, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray)
    402             if verbose:
    403                 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 404             result = training_function(trial_to_run.config)
    405             if result is not None:
    406                 if isinstance(result, dict):

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(self, estimator, config_w_resource)
    241             self.learner_classes.get(estimator),
    242             self.log_training_metric,
--> 243             self.fit_kwargs,
    244         )
    245         result = {

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
    435             budget=budget,
    436             log_training_metric=log_training_metric,
--> 437             fit_kwargs=fit_kwargs,
    438         )
    439     else:

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/ml.py in get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_test, groups_test, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
    258     #     fit_kwargs['X_val'] = X_test
    259     #     fit_kwargs['y_val'] = y_test
--> 260     estimator.fit(X_train, y_train, budget, **fit_kwargs)
    261     test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
    262         config,

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
    939                 ),
    940                 callbacks=CatBoostEstimator._callbacks(start_time, deadline),
--> 941                 **kwargs,
    942             )
    943         else:

TypeError: fit() got an unexpected keyword argument 'groups'
sonichi commented 2 years ago

"groups" for catboost is not currently supported. The reason is that CatBoostClassifier does not accept groups in its fit() function.