microsoft / FLAML

A fast library for AutoML and tuning. Join our Discord: https://discord.gg/Cppx2vSPVP.
https://microsoft.github.io/FLAML/
MIT License
3.86k stars 506 forks source link

Error: TypeError: generate_variants() takes 1 positional argument but 2 were given on 0.10.0 #494

Open lokijota opened 2 years ago

lokijota commented 2 years ago

Hi,

I'm trying flaml on a new data set on a regression problem. I'm using version 0.10.0 . The following error also happens if I use classification. The dataset looks like this:

time , x, y,  direction,  congestion,  hour,  day_of_week,  days_since_beg_of_time
1991-04-01 00:00:00,  0,  0,  'EB',  70,  0,  0,  0

The code is as simple as I can have it (I removed all the extra params). The error also happens if I limit to a specific estimator:

from flaml import AutoML

automl = AutoML()

automl.fit(X,y, task="regression")

And the stack trace is:

[flaml.automl: 03-21 22:39:26] {2068} INFO - task = regression
[flaml.automl: 03-21 22:39:26] {2070} INFO - Data split method: uniform
[flaml.automl: 03-21 22:39:26] {2074} INFO - Evaluation method: holdout
[flaml.automl: 03-21 22:39:26] {2155} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-21 22:39:26] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-21 22:39:26] {2501} INFO - iteration 0, current learner lgbm
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/space.py in generate_variants_compatible(unresolved_spec, constant_grid_search, random_state)
     20     try:
---> 21         return generate_variants(unresolved_spec, constant_grid_search, random_state)
     22     except TypeError:

TypeError: generate_variants() takes 1 positional argument but 3 were given

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-22-dd222f0e9e44> in <module>
      3 automl = AutoML()
      4 
----> 5 automl.fit(X,y, task="regression") #, metric="mse", use_ray=False)
      6 
      7 

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   2273         else:
   2274             self._training_log = None
-> 2275             self._search()
   2276         if self._best_estimator:
   2277             logger.info("fit succeeded")

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2769             )
   2770         elif not self._use_ray:
-> 2771             self._search_sequential()
   2772         else:
   2773             self._search_parallel()

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   2592                 time_budget_s=min(budget_left, self._state.train_time_limit),
   2593                 verbose=max(self.verbose - 3, 0),
-> 2594                 use_ray=False,
   2595             )
   2596             time_used = time.time() - start_run_time

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation)
    445         and fail < ub
    446     ):
--> 447         trial_to_run = _runner.step()
    448         if trial_to_run:
    449             num_trials += 1

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/trial_runner.py in step(self)
    117         """
    118         trial_id = Trial.generate_id()
--> 119         config = self._search_alg.suggest(trial_id)
    120         if config is not None:
    121             trial = SimpleTrial(config, trial_id)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/ray/tune/suggest/suggestion.py in suggest(self, trial_id)
    337             return
    338 
--> 339         suggestion = self.searcher.suggest(trial_id)
    340         if suggestion not in (None, Searcher.FINISHED):
    341             self.live_trials.add(trial_id)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/searcher/blendsearch.py in suggest(self, trial_id)
   1000             # Need to restart
   1001             self._init_used = False
-> 1002         return super().suggest(trial_id)
   1003 
   1004     def _select_thread(self) -> Tuple:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/searcher/blendsearch.py in suggest(self, trial_id)
    707                 init_config = self._ls.init_config
    708             config, space = self._ls.complete_config(
--> 709                 init_config, self._ls_bound_min, self._ls_bound_max
    710             )
    711             if reward is None:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/searcher/flow2.py in complete_config(self, partial_config, lower, upper)
    241         # if not the first time to complete init_config, use random gaussian
    242         config, space = complete_config(
--> 243             partial_config, self.space, self, disturb, lower, upper
    244         )
    245         if partial_config == self.init_config:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/space.py in complete_config(partial_config, space, flow2, disturb, lower, upper)
    518             config[key] = value
    519     for _, generated in generate_variants_compatible(
--> 520         {"config": config}, random_state=flow2.rs_random
    521     ):
    522         config = generated["config"]

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/space.py in generate_variants_compatible(unresolved_spec, constant_grid_search, random_state)
     21         return generate_variants(unresolved_spec, constant_grid_search, random_state)
     22     except TypeError:
---> 23         return generate_variants(unresolved_spec, constant_grid_search)
     24 
     25 

TypeError: generate_variants() takes 1 positional argument but 2 were given

Any ideas? I've used FLAML succesfully in the past (older versions I can't go back to), but now I can't run a simpe fit.

lokijota commented 2 years ago

Here's the version of ray, which seems to be the cause of the error:

import ray
ray.__version__

'1.0.1.post1'
lokijota commented 2 years ago

I did an update of ray to '1.11.0' and now the error is different, I think this time related to dependencies to xgboost.

In an case, I don't see why flaml has a dependency on Ray if I don't use distributed training, and the coomment in https://github.com/microsoft/FLAML/blob/569908fbe64f08b19fbef7487513b81d62e6eaba/flaml/tune/space.py saying that ray > 1.0.0 is probably wrong.

Any suggestions to fix the below also welcome.

[flaml.automl: 03-21 22:46:24] {2068} INFO - task = regression
[flaml.automl: 03-21 22:46:24] {2070} INFO - Data split method: uniform
[flaml.automl: 03-21 22:46:24] {2074} INFO - Evaluation method: holdout
[flaml.automl: 03-21 22:46:25] {2155} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-21 22:46:25] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-21 22:46:25] {2617} INFO - Estimated sufficient time budget=62359s. Estimated necessary time budget=533s.
[flaml.automl: 03-21 22:46:25] {2669} INFO -  at 9.0s,  estimator lgbm's best error=0.8954, best estimator lgbm's best error=0.8954
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-21 22:46:25] {2669} INFO -  at 9.1s,  estimator lgbm's best error=0.8954, best estimator lgbm's best error=0.8954
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-21 22:46:25] {2669} INFO -  at 9.2s,  estimator lgbm's best error=0.7933, best estimator lgbm's best error=0.7933
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 3, current learner xgboost
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-8-dd222f0e9e44> in <module>
      3 automl = AutoML()
      4 
----> 5 automl.fit(X,y, task="regression") #, metric="mse", use_ray=False)
      6 
      7 

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   2273         else:
   2274             self._training_log = None
-> 2275             self._search()
   2276         if self._best_estimator:
   2277             logger.info("fit succeeded")

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2769             )
   2770         elif not self._use_ray:
-> 2771             self._search_sequential()
   2772         else:
   2773             self._search_parallel()

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   2592                 time_budget_s=min(budget_left, self._state.train_time_limit),
   2593                 verbose=max(self.verbose - 3, 0),
-> 2594                 use_ray=False,
   2595             )
   2596             time_used = time.time() - start_run_time

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation)
    450             if verbose:
    451                 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 452             result = evaluation_function(trial_to_run.config)
    453             if result is not None:
    454                 if isinstance(result, dict):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(config_w_resource, state, estimator)
    278             state.learner_classes.get(estimator),
    279             state.log_training_metric,
--> 280             state.fit_kwargs,
    281         )
    282         if state.retrain_final and not state.model_history:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
    576             budget=budget,
    577             log_training_metric=log_training_metric,
--> 578             fit_kwargs=fit_kwargs,
    579         )
    580     else:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
    398     #     fit_kwargs['X_val'] = X_val
    399     #     fit_kwargs['y_val'] = y_val
--> 400     estimator.fit(X_train, y_train, budget, **fit_kwargs)
    401     val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
    402         config,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
   1199             self.params["tree_method"] = "gpu_hist"
   1200             kwargs.pop("gpu_per_trial")
-> 1201         return super().fit(X_train, y_train, budget, **kwargs)
   1202 
   1203     def _callbacks(self, start_time, deadline) -> List[Callable]:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
    970                 y_train,
    971                 callbacks=callbacks,
--> 972                 **kwargs,
    973             )
    974             best_iteration = (

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in _fit(self, X_train, y_train, **kwargs)
    150         if logger.level == logging.DEBUG:
    151             logger.debug(f"flaml.model - {model} fit started")
--> 152         model.fit(X_train, y_train, **kwargs)
    153         if logger.level == logging.DEBUG:
    154             logger.debug(f"flaml.model - {model} fit finished")

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
    420         for k, arg in zip(sig.parameters, args):
    421             kwargs[k] = arg
--> 422         return f(**kwargs)
    423 
    424     return inner_f

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks)
    579             X, y, group=None, sample_weight=sample_weight, base_margin=base_margin,
    580             feature_weights=feature_weights, eval_set=eval_set,
--> 581             sample_weight_eval_set=sample_weight_eval_set, eval_group=None)
    582         params = self.get_xgb_params()
    583 

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(self, X, y, group, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform)
    265         train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
    266                                 base_margin=base_margin,
--> 267                                 missing=self.missing, nthread=self.n_jobs)
    268         train_dmatrix.set_info(feature_weights=feature_weights, group=group)
    269 

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, enable_categorical)
    507         self.handle = handle
    508 
--> 509         self.set_info(label=label, weight=weight, base_margin=base_margin)
    510 
    511         self.feature_names = feature_names

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
    420         for k, arg in zip(sig.parameters, args):
    421             kwargs[k] = arg
--> 422         return f(**kwargs)
    423 
    424     return inner_f

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_info(self, label, weight, base_margin, group, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
    528         '''Set meta info for DMatrix.'''
    529         if label is not None:
--> 530             self.set_label(label)
    531         if weight is not None:
    532             self.set_weight(weight)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_label(self, label)
    657         """
    658         from .data import dispatch_meta_backend
--> 659         dispatch_meta_backend(self, label, 'label', 'float')
    660 
    661     def set_weight(self, weight):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in dispatch_meta_backend(matrix, data, name, dtype)
    674         data = data.values.astype('float')
    675         assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
--> 676         _meta_from_numpy(data, name, dtype, handle)
    677         return
    678     if _is_dlpack(data):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in _meta_from_numpy(data, field, dtype, handle)
    598     ptr = interface['data'][0]
    599     ptr = ctypes.c_void_p(ptr)
--> 600     _check_call(_LIB.XGDMatrixSetDenseInfo(
    601         handle,
    602         c_str(field),

/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getattr__(self, name)
    359         if name.startswith('__') and name.endswith('__'):
    360             raise AttributeError(name)
--> 361         func = self.__getitem__(name)
    362         setattr(self, name, func)
    363         return func

/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getitem__(self, name_or_ordinal)
    364 
    365     def __getitem__(self, name_or_ordinal):
--> 366         func = self._FuncPtr((name_or_ordinal, self))
    367         if not isinstance(name_or_ordinal, int):
    368             func.__name__ = name_or_ordinal

AttributeError: /anaconda/envs/azureml_py36/lib/libxgboost.so: undefined symbol: XGDMatrixSetDenseInfo
lokijota commented 2 years ago

The error above happens both with xgboost 1.5.2 (latest, just installed) or 1.3.3, which is supposed to be the supported version. If I remove xgboost and xgb_limitdepth from the list of estimators, it seems to work.

sonichi commented 2 years ago

The error above happens both with xgboost 1.5.2 (latest, just installed) or 1.3.3, which is supposed to be the supported version. If I remove xgboost and xgb_limitdepth from the list of estimators, it seems to work.

I can't reproduce that error with "python 3.6 - AzureML" and xgboost 1.3.3. Could you try the following code?

from flaml import AutoML
from sklearn.datasets import fetch_california_housing

# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": 2,  # in seconds
    "metric": 'r2',
    "task": 'regression',
    "log_file_name": "california.log",
}
X_train, y_train = fetch_california_housing(return_X_y=True)
# Train with labeled input data
automl.fit(X_train=X_train, y_train=y_train,
           **automl_settings)

My output is:

[flaml.automl: 03-22 17:17:47] {1957} INFO - task = regression
[flaml.automl: 03-22 17:17:47] {1959} INFO - Data split method: uniform
[flaml.automl: 03-22 17:17:47] {1963} INFO - Evaluation method: holdout
[flaml.automl: 03-22 17:17:47] {2055} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-22 17:17:47] {2107} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2461} INFO - Estimated sufficient time budget=448s. Estimated necessary time budget=4s.
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.3s,  estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.3s,  estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.4s,  estimator lgbm's best error=0.5446, best estimator lgbm's best error=0.5446
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 3, current learner xgboost
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.7s,  estimator xgboost's best error=1.5011,  best estimator lgbm's best error=0.5446
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 4, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.7s,  estimator lgbm's best error=0.2807, best estimator lgbm's best error=0.2807
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 5, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.8s,  estimator lgbm's best error=0.2807, best estimator lgbm's best error=0.2807
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 6, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO -  at 0.8s,  estimator lgbm's best error=0.2712, best estimator lgbm's best error=0.2712
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 7, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO -  at 0.9s,  estimator lgbm's best error=0.2712, best estimator lgbm's best error=0.2712
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 8, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO -  at 0.9s,  estimator lgbm's best error=0.2712, best estimator lgbm's best error=0.2712
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 9, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO -  at 1.0s,  estimator lgbm's best error=0.2197, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 10, current learner extra_tree
[flaml.automl: 03-22 17:17:48] {2541} INFO -  at 1.3s,  estimator extra_tree's best error=0.5927,   best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 11, current learner rf
[flaml.automl: 03-22 17:17:48] {2541} INFO -  at 1.6s,  estimator rf's best error=0.5290,   best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 12, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO -  at 1.7s,  estimator lgbm's best error=0.2197, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 13, current learner rf
[flaml.automl: 03-22 17:17:49] {2541} INFO -  at 2.0s,  estimator rf's best error=0.3804,   best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:49] {2753} INFO - retrain lgbm for 0.1s
[flaml.automl: 03-22 17:17:49] {2758} INFO - retrained model: LGBMRegressor(colsample_bytree=0.7610534336273627,
              learning_rate=0.41929025492645006, max_bin=255,
              min_child_samples=4, n_estimators=45, num_leaves=4,
              reg_alpha=0.0009765625, reg_lambda=0.009280655005879927,
              verbose=-1)
[flaml.automl: 03-22 17:17:49] {2136} INFO - fit succeeded
[flaml.automl: 03-22 17:17:49] {2138} INFO - Time taken to find the best model: 0.9899353981018066
lokijota commented 2 years ago

I have an error:

mport xgboost
xgboost.__version__
'1.3.3'

error:

[flaml.automl: 03-23 09:59:07] {2068} INFO - task = regression
[flaml.automl: 03-23 09:59:07] {2070} INFO - Data split method: uniform
[flaml.automl: 03-23 09:59:07] {2074} INFO - Evaluation method: holdout
[flaml.automl: 03-23 09:59:07] {2155} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-23 09:59:07] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-23 09:59:07] {2501} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2617} INFO - Estimated sufficient time budget=363s. Estimated necessary time budget=3s.
[flaml.automl: 03-23 09:59:08] {2669} INFO -  at 0.1s,  estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2669} INFO -  at 0.1s,  estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2669} INFO -  at 0.2s,  estimator lgbm's best error=0.5446, best estimator lgbm's best error=0.5446
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 3, current learner xgboost
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-331-4ef4c4134da1> in <module>
     14 # Train with labeled input data
     15 automl.fit(X_train=X_train, y_train=y_train,
---> 16            **automl_settings)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   2270             with training_log_writer(log_file_name, append_log) as save_helper:
   2271                 self._training_log = save_helper
-> 2272                 self._search()
   2273         else:
   2274             self._training_log = None

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2769             )
   2770         elif not self._use_ray:
-> 2771             self._search_sequential()
   2772         else:
   2773             self._search_parallel()

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   2592                 time_budget_s=min(budget_left, self._state.train_time_limit),
   2593                 verbose=max(self.verbose - 3, 0),
-> 2594                 use_ray=False,
   2595             )
   2596             time_used = time.time() - start_run_time

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation)
    450             if verbose:
    451                 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 452             result = evaluation_function(trial_to_run.config)
    453             if result is not None:
    454                 if isinstance(result, dict):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(config_w_resource, state, estimator)
    278             state.learner_classes.get(estimator),
    279             state.log_training_metric,
--> 280             state.fit_kwargs,
    281         )
    282         if state.retrain_final and not state.model_history:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
    576             budget=budget,
    577             log_training_metric=log_training_metric,
--> 578             fit_kwargs=fit_kwargs,
    579         )
    580     else:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
    398     #     fit_kwargs['X_val'] = X_val
    399     #     fit_kwargs['y_val'] = y_val
--> 400     estimator.fit(X_train, y_train, budget, **fit_kwargs)
    401     val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
    402         config,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
   1199             self.params["tree_method"] = "gpu_hist"
   1200             kwargs.pop("gpu_per_trial")
-> 1201         return super().fit(X_train, y_train, budget, **kwargs)
   1202 
   1203     def _callbacks(self, start_time, deadline) -> List[Callable]:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
    970                 y_train,
    971                 callbacks=callbacks,
--> 972                 **kwargs,
    973             )
    974             best_iteration = (

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in _fit(self, X_train, y_train, **kwargs)
    150         if logger.level == logging.DEBUG:
    151             logger.debug(f"flaml.model - {model} fit started")
--> 152         model.fit(X_train, y_train, **kwargs)
    153         if logger.level == logging.DEBUG:
    154             logger.debug(f"flaml.model - {model} fit finished")

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
    420                 feature_types,
    421                 self._enable_categorical,
--> 422             )
    423             # Stage the data, meta info are copied inside C++ MetaInfo.
    424             self._temporary_data = (new, cat_codes)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks)
    579                 meta['classes_'] = self.classes_.tolist()
    580                 continue
--> 581             try:
    582                 json.dumps({k: v})
    583                 meta[k] = v

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(self, X, y, group, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform)
    265 def _wrap_evaluation_matrices(
    266     missing: float,
--> 267     X: Any,
    268     y: Any,
    269     group: Optional[Any],

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, enable_categorical)
    507 
    508     return inner_f
--> 509 
    510 
    511 class DMatrix:  # pylint: disable=too-many-instance-attributes

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
    420                 feature_types,
    421                 self._enable_categorical,
--> 422             )
    423             # Stage the data, meta info are copied inside C++ MetaInfo.
    424             self._temporary_data = (new, cat_codes)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_info(self, label, weight, base_margin, group, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
    528         silent=False,
    529         feature_names: Optional[List[str]] = None,
--> 530         feature_types: Optional[List[str]] = None,
    531         nthread: Optional[int] = None,
    532         group=None,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_label(self, label)
    657             None,
    658             it.proxy.handle,
--> 659             reset_callback,
    660             next_callback,
    661             args,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in dispatch_meta_backend(matrix, data, name, dtype)
    664     return _from_cupy_array(data, missing, nthread, feature_names,
    665                             feature_types)
--> 666 
    667 
    668 def _is_uri(data):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in _meta_from_numpy(data, field, dtype, handle)
    598 
    599 def _transform_cupy_array(data):
--> 600     import cupy  # pylint: disable=import-error
    601     if not hasattr(data, '__cuda_array_interface__') and hasattr(
    602             data, '__array__'):

/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getattr__(self, name)
    359         if name.startswith('__') and name.endswith('__'):
    360             raise AttributeError(name)
--> 361         func = self.__getitem__(name)
    362         setattr(self, name, func)
    363         return func

/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getitem__(self, name_or_ordinal)
    364 
    365     def __getitem__(self, name_or_ordinal):
--> 366         func = self._FuncPtr((name_or_ordinal, self))
    367         if not isinstance(name_or_ordinal, int):
    368             func.__name__ = name_or_ordinal

AttributeError: /anaconda/envs/azureml_py36/lib/libxgboost.so: undefined symbol: XGDMatrixSetDenseInfo

Note that there seem to be two issues here:

sonichi commented 2 years ago

I have an error:

mport xgboost
xgboost.__version__
'1.3.3'

error:

[flaml.automl: 03-23 09:59:07] {2068} INFO - task = regression
[flaml.automl: 03-23 09:59:07] {2070} INFO - Data split method: uniform
[flaml.automl: 03-23 09:59:07] {2074} INFO - Evaluation method: holdout
[flaml.automl: 03-23 09:59:07] {2155} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-23 09:59:07] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-23 09:59:07] {2501} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2617} INFO - Estimated sufficient time budget=363s. Estimated necessary time budget=3s.
[flaml.automl: 03-23 09:59:08] {2669} INFO -  at 0.1s,    estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2669} INFO -  at 0.1s,    estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2669} INFO -  at 0.2s,    estimator lgbm's best error=0.5446, best estimator lgbm's best error=0.5446
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 3, current learner xgboost
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-331-4ef4c4134da1> in <module>
     14 # Train with labeled input data
     15 automl.fit(X_train=X_train, y_train=y_train,
---> 16            **automl_settings)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   2270             with training_log_writer(log_file_name, append_log) as save_helper:
   2271                 self._training_log = save_helper
-> 2272                 self._search()
   2273         else:
   2274             self._training_log = None

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2769             )
   2770         elif not self._use_ray:
-> 2771             self._search_sequential()
   2772         else:
   2773             self._search_parallel()

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   2592                 time_budget_s=min(budget_left, self._state.train_time_limit),
   2593                 verbose=max(self.verbose - 3, 0),
-> 2594                 use_ray=False,
   2595             )
   2596             time_used = time.time() - start_run_time

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation)
    450             if verbose:
    451                 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 452             result = evaluation_function(trial_to_run.config)
    453             if result is not None:
    454                 if isinstance(result, dict):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(config_w_resource, state, estimator)
    278             state.learner_classes.get(estimator),
    279             state.log_training_metric,
--> 280             state.fit_kwargs,
    281         )
    282         if state.retrain_final and not state.model_history:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
    576             budget=budget,
    577             log_training_metric=log_training_metric,
--> 578             fit_kwargs=fit_kwargs,
    579         )
    580     else:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
    398     #     fit_kwargs['X_val'] = X_val
    399     #     fit_kwargs['y_val'] = y_val
--> 400     estimator.fit(X_train, y_train, budget, **fit_kwargs)
    401     val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
    402         config,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
   1199             self.params["tree_method"] = "gpu_hist"
   1200             kwargs.pop("gpu_per_trial")
-> 1201         return super().fit(X_train, y_train, budget, **kwargs)
   1202 
   1203     def _callbacks(self, start_time, deadline) -> List[Callable]:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
    970                 y_train,
    971                 callbacks=callbacks,
--> 972                 **kwargs,
    973             )
    974             best_iteration = (

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in _fit(self, X_train, y_train, **kwargs)
    150         if logger.level == logging.DEBUG:
    151             logger.debug(f"flaml.model - {model} fit started")
--> 152         model.fit(X_train, y_train, **kwargs)
    153         if logger.level == logging.DEBUG:
    154             logger.debug(f"flaml.model - {model} fit finished")

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
    420                 feature_types,
    421                 self._enable_categorical,
--> 422             )
    423             # Stage the data, meta info are copied inside C++ MetaInfo.
    424             self._temporary_data = (new, cat_codes)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks)
    579                 meta['classes_'] = self.classes_.tolist()
    580                 continue
--> 581             try:
    582                 json.dumps({k: v})
    583                 meta[k] = v

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(self, X, y, group, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform)
    265 def _wrap_evaluation_matrices(
    266     missing: float,
--> 267     X: Any,
    268     y: Any,
    269     group: Optional[Any],

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, enable_categorical)
    507 
    508     return inner_f
--> 509 
    510 
    511 class DMatrix:  # pylint: disable=too-many-instance-attributes

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
    420                 feature_types,
    421                 self._enable_categorical,
--> 422             )
    423             # Stage the data, meta info are copied inside C++ MetaInfo.
    424             self._temporary_data = (new, cat_codes)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_info(self, label, weight, base_margin, group, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
    528         silent=False,
    529         feature_names: Optional[List[str]] = None,
--> 530         feature_types: Optional[List[str]] = None,
    531         nthread: Optional[int] = None,
    532         group=None,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_label(self, label)
    657             None,
    658             it.proxy.handle,
--> 659             reset_callback,
    660             next_callback,
    661             args,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in dispatch_meta_backend(matrix, data, name, dtype)
    664     return _from_cupy_array(data, missing, nthread, feature_names,
    665                             feature_types)
--> 666 
    667 
    668 def _is_uri(data):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in _meta_from_numpy(data, field, dtype, handle)
    598 
    599 def _transform_cupy_array(data):
--> 600     import cupy  # pylint: disable=import-error
    601     if not hasattr(data, '__cuda_array_interface__') and hasattr(
    602             data, '__array__'):

/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getattr__(self, name)
    359         if name.startswith('__') and name.endswith('__'):
    360             raise AttributeError(name)
--> 361         func = self.__getitem__(name)
    362         setattr(self, name, func)
    363         return func

/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getitem__(self, name_or_ordinal)
    364 
    365     def __getitem__(self, name_or_ordinal):
--> 366         func = self._FuncPtr((name_or_ordinal, self))
    367         if not isinstance(name_or_ordinal, int):
    368             func.__name__ = name_or_ordinal

AttributeError: /anaconda/envs/azureml_py36/lib/libxgboost.so: undefined symbol: XGDMatrixSetDenseInfo

Note that there seem to be two issues here:

  • one, the version of ray that's supposed to be needed is incorrect, a newer version is needed than 1.0.0 as the code file says
  • I have some sort of library issue (maybe dependencies?) here with running flaml with xgboost, and have to leave the algorithm out of the estimator list.

I made a PR to address the first issue. For the second issue, could you try installing flaml in a clean environment? If the problem persists, we need to look deeper into the environment you are using.