Open lokijota opened 2 years ago
Here's the version of ray, which seems to be the cause of the error:
import ray
ray.__version__
'1.0.1.post1'
I did an update of ray to '1.11.0' and now the error is different, I think this time related to dependencies to xgboost.
In an case, I don't see why flaml has a dependency on Ray if I don't use distributed training, and the coomment in https://github.com/microsoft/FLAML/blob/569908fbe64f08b19fbef7487513b81d62e6eaba/flaml/tune/space.py saying that ray > 1.0.0 is probably wrong.
Any suggestions to fix the below also welcome.
[flaml.automl: 03-21 22:46:24] {2068} INFO - task = regression
[flaml.automl: 03-21 22:46:24] {2070} INFO - Data split method: uniform
[flaml.automl: 03-21 22:46:24] {2074} INFO - Evaluation method: holdout
[flaml.automl: 03-21 22:46:25] {2155} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-21 22:46:25] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-21 22:46:25] {2617} INFO - Estimated sufficient time budget=62359s. Estimated necessary time budget=533s.
[flaml.automl: 03-21 22:46:25] {2669} INFO - at 9.0s, estimator lgbm's best error=0.8954, best estimator lgbm's best error=0.8954
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-21 22:46:25] {2669} INFO - at 9.1s, estimator lgbm's best error=0.8954, best estimator lgbm's best error=0.8954
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-21 22:46:25] {2669} INFO - at 9.2s, estimator lgbm's best error=0.7933, best estimator lgbm's best error=0.7933
[flaml.automl: 03-21 22:46:25] {2501} INFO - iteration 3, current learner xgboost
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-8-dd222f0e9e44> in <module>
3 automl = AutoML()
4
----> 5 automl.fit(X,y, task="regression") #, metric="mse", use_ray=False)
6
7
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
2273 else:
2274 self._training_log = None
-> 2275 self._search()
2276 if self._best_estimator:
2277 logger.info("fit succeeded")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self)
2769 )
2770 elif not self._use_ray:
-> 2771 self._search_sequential()
2772 else:
2773 self._search_parallel()
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
2592 time_budget_s=min(budget_left, self._state.train_time_limit),
2593 verbose=max(self.verbose - 3, 0),
-> 2594 use_ray=False,
2595 )
2596 time_used = time.time() - start_run_time
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation)
450 if verbose:
451 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 452 result = evaluation_function(trial_to_run.config)
453 if result is not None:
454 if isinstance(result, dict):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(config_w_resource, state, estimator)
278 state.learner_classes.get(estimator),
279 state.log_training_metric,
--> 280 state.fit_kwargs,
281 )
282 if state.retrain_final and not state.model_history:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
576 budget=budget,
577 log_training_metric=log_training_metric,
--> 578 fit_kwargs=fit_kwargs,
579 )
580 else:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
398 # fit_kwargs['X_val'] = X_val
399 # fit_kwargs['y_val'] = y_val
--> 400 estimator.fit(X_train, y_train, budget, **fit_kwargs)
401 val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
402 config,
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
1199 self.params["tree_method"] = "gpu_hist"
1200 kwargs.pop("gpu_per_trial")
-> 1201 return super().fit(X_train, y_train, budget, **kwargs)
1202
1203 def _callbacks(self, start_time, deadline) -> List[Callable]:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
970 y_train,
971 callbacks=callbacks,
--> 972 **kwargs,
973 )
974 best_iteration = (
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in _fit(self, X_train, y_train, **kwargs)
150 if logger.level == logging.DEBUG:
151 logger.debug(f"flaml.model - {model} fit started")
--> 152 model.fit(X_train, y_train, **kwargs)
153 if logger.level == logging.DEBUG:
154 logger.debug(f"flaml.model - {model} fit finished")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
420 for k, arg in zip(sig.parameters, args):
421 kwargs[k] = arg
--> 422 return f(**kwargs)
423
424 return inner_f
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks)
579 X, y, group=None, sample_weight=sample_weight, base_margin=base_margin,
580 feature_weights=feature_weights, eval_set=eval_set,
--> 581 sample_weight_eval_set=sample_weight_eval_set, eval_group=None)
582 params = self.get_xgb_params()
583
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(self, X, y, group, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform)
265 train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
266 base_margin=base_margin,
--> 267 missing=self.missing, nthread=self.n_jobs)
268 train_dmatrix.set_info(feature_weights=feature_weights, group=group)
269
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, enable_categorical)
507 self.handle = handle
508
--> 509 self.set_info(label=label, weight=weight, base_margin=base_margin)
510
511 self.feature_names = feature_names
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
420 for k, arg in zip(sig.parameters, args):
421 kwargs[k] = arg
--> 422 return f(**kwargs)
423
424 return inner_f
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_info(self, label, weight, base_margin, group, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
528 '''Set meta info for DMatrix.'''
529 if label is not None:
--> 530 self.set_label(label)
531 if weight is not None:
532 self.set_weight(weight)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_label(self, label)
657 """
658 from .data import dispatch_meta_backend
--> 659 dispatch_meta_backend(self, label, 'label', 'float')
660
661 def set_weight(self, weight):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in dispatch_meta_backend(matrix, data, name, dtype)
674 data = data.values.astype('float')
675 assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
--> 676 _meta_from_numpy(data, name, dtype, handle)
677 return
678 if _is_dlpack(data):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in _meta_from_numpy(data, field, dtype, handle)
598 ptr = interface['data'][0]
599 ptr = ctypes.c_void_p(ptr)
--> 600 _check_call(_LIB.XGDMatrixSetDenseInfo(
601 handle,
602 c_str(field),
/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getattr__(self, name)
359 if name.startswith('__') and name.endswith('__'):
360 raise AttributeError(name)
--> 361 func = self.__getitem__(name)
362 setattr(self, name, func)
363 return func
/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getitem__(self, name_or_ordinal)
364
365 def __getitem__(self, name_or_ordinal):
--> 366 func = self._FuncPtr((name_or_ordinal, self))
367 if not isinstance(name_or_ordinal, int):
368 func.__name__ = name_or_ordinal
AttributeError: /anaconda/envs/azureml_py36/lib/libxgboost.so: undefined symbol: XGDMatrixSetDenseInfo
The error above happens both with xgboost 1.5.2 (latest, just installed) or 1.3.3, which is supposed to be the supported version. If I remove xgboost and xgb_limitdepth from the list of estimators, it seems to work.
The error above happens both with xgboost 1.5.2 (latest, just installed) or 1.3.3, which is supposed to be the supported version. If I remove xgboost and xgb_limitdepth from the list of estimators, it seems to work.
I can't reproduce that error with "python 3.6 - AzureML" and xgboost 1.3.3. Could you try the following code?
from flaml import AutoML
from sklearn.datasets import fetch_california_housing
# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
"time_budget": 2, # in seconds
"metric": 'r2',
"task": 'regression',
"log_file_name": "california.log",
}
X_train, y_train = fetch_california_housing(return_X_y=True)
# Train with labeled input data
automl.fit(X_train=X_train, y_train=y_train,
**automl_settings)
My output is:
[flaml.automl: 03-22 17:17:47] {1957} INFO - task = regression
[flaml.automl: 03-22 17:17:47] {1959} INFO - Data split method: uniform
[flaml.automl: 03-22 17:17:47] {1963} INFO - Evaluation method: holdout
[flaml.automl: 03-22 17:17:47] {2055} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-22 17:17:47] {2107} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2461} INFO - Estimated sufficient time budget=448s. Estimated necessary time budget=4s.
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.3s, estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.3s, estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.4s, estimator lgbm's best error=0.5446, best estimator lgbm's best error=0.5446
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 3, current learner xgboost
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.7s, estimator xgboost's best error=1.5011, best estimator lgbm's best error=0.5446
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 4, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.7s, estimator lgbm's best error=0.2807, best estimator lgbm's best error=0.2807
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 5, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.8s, estimator lgbm's best error=0.2807, best estimator lgbm's best error=0.2807
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 6, current learner lgbm
[flaml.automl: 03-22 17:17:47] {2541} INFO - at 0.8s, estimator lgbm's best error=0.2712, best estimator lgbm's best error=0.2712
[flaml.automl: 03-22 17:17:47] {2347} INFO - iteration 7, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO - at 0.9s, estimator lgbm's best error=0.2712, best estimator lgbm's best error=0.2712
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 8, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO - at 0.9s, estimator lgbm's best error=0.2712, best estimator lgbm's best error=0.2712
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 9, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO - at 1.0s, estimator lgbm's best error=0.2197, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 10, current learner extra_tree
[flaml.automl: 03-22 17:17:48] {2541} INFO - at 1.3s, estimator extra_tree's best error=0.5927, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 11, current learner rf
[flaml.automl: 03-22 17:17:48] {2541} INFO - at 1.6s, estimator rf's best error=0.5290, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 12, current learner lgbm
[flaml.automl: 03-22 17:17:48] {2541} INFO - at 1.7s, estimator lgbm's best error=0.2197, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:48] {2347} INFO - iteration 13, current learner rf
[flaml.automl: 03-22 17:17:49] {2541} INFO - at 2.0s, estimator rf's best error=0.3804, best estimator lgbm's best error=0.2197
[flaml.automl: 03-22 17:17:49] {2753} INFO - retrain lgbm for 0.1s
[flaml.automl: 03-22 17:17:49] {2758} INFO - retrained model: LGBMRegressor(colsample_bytree=0.7610534336273627,
learning_rate=0.41929025492645006, max_bin=255,
min_child_samples=4, n_estimators=45, num_leaves=4,
reg_alpha=0.0009765625, reg_lambda=0.009280655005879927,
verbose=-1)
[flaml.automl: 03-22 17:17:49] {2136} INFO - fit succeeded
[flaml.automl: 03-22 17:17:49] {2138} INFO - Time taken to find the best model: 0.9899353981018066
I have an error:
mport xgboost
xgboost.__version__
'1.3.3'
error:
[flaml.automl: 03-23 09:59:07] {2068} INFO - task = regression
[flaml.automl: 03-23 09:59:07] {2070} INFO - Data split method: uniform
[flaml.automl: 03-23 09:59:07] {2074} INFO - Evaluation method: holdout
[flaml.automl: 03-23 09:59:07] {2155} INFO - Minimizing error metric: 1-r2
[flaml.automl: 03-23 09:59:07] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 03-23 09:59:07] {2501} INFO - iteration 0, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2617} INFO - Estimated sufficient time budget=363s. Estimated necessary time budget=3s.
[flaml.automl: 03-23 09:59:08] {2669} INFO - at 0.1s, estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 1, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2669} INFO - at 0.1s, estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 2, current learner lgbm
[flaml.automl: 03-23 09:59:08] {2669} INFO - at 0.2s, estimator lgbm's best error=0.5446, best estimator lgbm's best error=0.5446
[flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 3, current learner xgboost
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-331-4ef4c4134da1> in <module>
14 # Train with labeled input data
15 automl.fit(X_train=X_train, y_train=y_train,
---> 16 **automl_settings)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
2270 with training_log_writer(log_file_name, append_log) as save_helper:
2271 self._training_log = save_helper
-> 2272 self._search()
2273 else:
2274 self._training_log = None
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self)
2769 )
2770 elif not self._use_ray:
-> 2771 self._search_sequential()
2772 else:
2773 self._search_parallel()
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
2592 time_budget_s=min(budget_left, self._state.train_time_limit),
2593 verbose=max(self.verbose - 3, 0),
-> 2594 use_ray=False,
2595 )
2596 time_used = time.time() - start_run_time
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation)
450 if verbose:
451 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 452 result = evaluation_function(trial_to_run.config)
453 if result is not None:
454 if isinstance(result, dict):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(config_w_resource, state, estimator)
278 state.learner_classes.get(estimator),
279 state.log_training_metric,
--> 280 state.fit_kwargs,
281 )
282 if state.retrain_final and not state.model_history:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
576 budget=budget,
577 log_training_metric=log_training_metric,
--> 578 fit_kwargs=fit_kwargs,
579 )
580 else:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
398 # fit_kwargs['X_val'] = X_val
399 # fit_kwargs['y_val'] = y_val
--> 400 estimator.fit(X_train, y_train, budget, **fit_kwargs)
401 val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
402 config,
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
1199 self.params["tree_method"] = "gpu_hist"
1200 kwargs.pop("gpu_per_trial")
-> 1201 return super().fit(X_train, y_train, budget, **kwargs)
1202
1203 def _callbacks(self, start_time, deadline) -> List[Callable]:
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
970 y_train,
971 callbacks=callbacks,
--> 972 **kwargs,
973 )
974 best_iteration = (
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in _fit(self, X_train, y_train, **kwargs)
150 if logger.level == logging.DEBUG:
151 logger.debug(f"flaml.model - {model} fit started")
--> 152 model.fit(X_train, y_train, **kwargs)
153 if logger.level == logging.DEBUG:
154 logger.debug(f"flaml.model - {model} fit finished")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
420 feature_types,
421 self._enable_categorical,
--> 422 )
423 # Stage the data, meta info are copied inside C++ MetaInfo.
424 self._temporary_data = (new, cat_codes)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks)
579 meta['classes_'] = self.classes_.tolist()
580 continue
--> 581 try:
582 json.dumps({k: v})
583 meta[k] = v
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(self, X, y, group, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform)
265 def _wrap_evaluation_matrices(
266 missing: float,
--> 267 X: Any,
268 y: Any,
269 group: Optional[Any],
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, enable_categorical)
507
508 return inner_f
--> 509
510
511 class DMatrix: # pylint: disable=too-many-instance-attributes
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs)
420 feature_types,
421 self._enable_categorical,
--> 422 )
423 # Stage the data, meta info are copied inside C++ MetaInfo.
424 self._temporary_data = (new, cat_codes)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_info(self, label, weight, base_margin, group, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
528 silent=False,
529 feature_names: Optional[List[str]] = None,
--> 530 feature_types: Optional[List[str]] = None,
531 nthread: Optional[int] = None,
532 group=None,
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_label(self, label)
657 None,
658 it.proxy.handle,
--> 659 reset_callback,
660 next_callback,
661 args,
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in dispatch_meta_backend(matrix, data, name, dtype)
664 return _from_cupy_array(data, missing, nthread, feature_names,
665 feature_types)
--> 666
667
668 def _is_uri(data):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in _meta_from_numpy(data, field, dtype, handle)
598
599 def _transform_cupy_array(data):
--> 600 import cupy # pylint: disable=import-error
601 if not hasattr(data, '__cuda_array_interface__') and hasattr(
602 data, '__array__'):
/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getattr__(self, name)
359 if name.startswith('__') and name.endswith('__'):
360 raise AttributeError(name)
--> 361 func = self.__getitem__(name)
362 setattr(self, name, func)
363 return func
/anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getitem__(self, name_or_ordinal)
364
365 def __getitem__(self, name_or_ordinal):
--> 366 func = self._FuncPtr((name_or_ordinal, self))
367 if not isinstance(name_or_ordinal, int):
368 func.__name__ = name_or_ordinal
AttributeError: /anaconda/envs/azureml_py36/lib/libxgboost.so: undefined symbol: XGDMatrixSetDenseInfo
Note that there seem to be two issues here:
I have an error:
mport xgboost xgboost.__version__ '1.3.3'
error:
[flaml.automl: 03-23 09:59:07] {2068} INFO - task = regression [flaml.automl: 03-23 09:59:07] {2070} INFO - Data split method: uniform [flaml.automl: 03-23 09:59:07] {2074} INFO - Evaluation method: holdout [flaml.automl: 03-23 09:59:07] {2155} INFO - Minimizing error metric: 1-r2 [flaml.automl: 03-23 09:59:07] {2248} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth'] [flaml.automl: 03-23 09:59:07] {2501} INFO - iteration 0, current learner lgbm [flaml.automl: 03-23 09:59:08] {2617} INFO - Estimated sufficient time budget=363s. Estimated necessary time budget=3s. [flaml.automl: 03-23 09:59:08] {2669} INFO - at 0.1s, estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393 [flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 1, current learner lgbm [flaml.automl: 03-23 09:59:08] {2669} INFO - at 0.1s, estimator lgbm's best error=0.7393, best estimator lgbm's best error=0.7393 [flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 2, current learner lgbm [flaml.automl: 03-23 09:59:08] {2669} INFO - at 0.2s, estimator lgbm's best error=0.5446, best estimator lgbm's best error=0.5446 [flaml.automl: 03-23 09:59:08] {2501} INFO - iteration 3, current learner xgboost --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-331-4ef4c4134da1> in <module> 14 # Train with labeled input data 15 automl.fit(X_train=X_train, y_train=y_train, ---> 16 **automl_settings) /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs) 2270 with training_log_writer(log_file_name, append_log) as save_helper: 2271 self._training_log = save_helper -> 2272 self._search() 2273 else: 2274 self._training_log = None /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search(self) 2769 ) 2770 elif not self._use_ray: -> 2771 self._search_sequential() 2772 else: 2773 self._search_parallel() /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self) 2592 time_budget_s=min(budget_left, self._state.train_time_limit), 2593 verbose=max(self.verbose - 3, 0), -> 2594 use_ray=False, 2595 ) 2596 time_used = time.time() - start_run_time /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/tune/tune.py in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_incumbent_result_in_evaluation) 450 if verbose: 451 logger.info(f"trial {num_trials} config: {trial_to_run.config}") --> 452 result = evaluation_function(trial_to_run.config) 453 if result is not None: 454 if isinstance(result, dict): /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(config_w_resource, state, estimator) 278 state.learner_classes.get(estimator), 279 state.log_training_metric, --> 280 state.fit_kwargs, 281 ) 282 if state.retrain_final and not state.model_history: /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs) 576 budget=budget, 577 log_training_metric=log_training_metric, --> 578 fit_kwargs=fit_kwargs, 579 ) 580 else: /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/ml.py in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs) 398 # fit_kwargs['X_val'] = X_val 399 # fit_kwargs['y_val'] = y_val --> 400 estimator.fit(X_train, y_train, budget, **fit_kwargs) 401 val_loss, metric_for_logging, pred_time, _ = _eval_estimator( 402 config, /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs) 1199 self.params["tree_method"] = "gpu_hist" 1200 kwargs.pop("gpu_per_trial") -> 1201 return super().fit(X_train, y_train, budget, **kwargs) 1202 1203 def _callbacks(self, start_time, deadline) -> List[Callable]: /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs) 970 y_train, 971 callbacks=callbacks, --> 972 **kwargs, 973 ) 974 best_iteration = ( /anaconda/envs/azureml_py36/lib/python3.6/site-packages/flaml/model.py in _fit(self, X_train, y_train, **kwargs) 150 if logger.level == logging.DEBUG: 151 logger.debug(f"flaml.model - {model} fit started") --> 152 model.fit(X_train, y_train, **kwargs) 153 if logger.level == logging.DEBUG: 154 logger.debug(f"flaml.model - {model} fit finished") /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs) 420 feature_types, 421 self._enable_categorical, --> 422 ) 423 # Stage the data, meta info are copied inside C++ MetaInfo. 424 self._temporary_data = (new, cat_codes) /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks) 579 meta['classes_'] = self.classes_.tolist() 580 continue --> 581 try: 582 json.dumps({k: v}) 583 meta[k] = v /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/sklearn.py in _wrap_evaluation_matrices(self, X, y, group, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform) 265 def _wrap_evaluation_matrices( 266 missing: float, --> 267 X: Any, 268 y: Any, 269 group: Optional[Any], /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in __init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, enable_categorical) 507 508 return inner_f --> 509 510 511 class DMatrix: # pylint: disable=too-many-instance-attributes /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in inner_f(*args, **kwargs) 420 feature_types, 421 self._enable_categorical, --> 422 ) 423 # Stage the data, meta info are copied inside C++ MetaInfo. 424 self._temporary_data = (new, cat_codes) /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_info(self, label, weight, base_margin, group, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights) 528 silent=False, 529 feature_names: Optional[List[str]] = None, --> 530 feature_types: Optional[List[str]] = None, 531 nthread: Optional[int] = None, 532 group=None, /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/core.py in set_label(self, label) 657 None, 658 it.proxy.handle, --> 659 reset_callback, 660 next_callback, 661 args, /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in dispatch_meta_backend(matrix, data, name, dtype) 664 return _from_cupy_array(data, missing, nthread, feature_names, 665 feature_types) --> 666 667 668 def _is_uri(data): /anaconda/envs/azureml_py36/lib/python3.6/site-packages/xgboost/data.py in _meta_from_numpy(data, field, dtype, handle) 598 599 def _transform_cupy_array(data): --> 600 import cupy # pylint: disable=import-error 601 if not hasattr(data, '__cuda_array_interface__') and hasattr( 602 data, '__array__'): /anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getattr__(self, name) 359 if name.startswith('__') and name.endswith('__'): 360 raise AttributeError(name) --> 361 func = self.__getitem__(name) 362 setattr(self, name, func) 363 return func /anaconda/envs/azureml_py36/lib/python3.6/ctypes/__init__.py in __getitem__(self, name_or_ordinal) 364 365 def __getitem__(self, name_or_ordinal): --> 366 func = self._FuncPtr((name_or_ordinal, self)) 367 if not isinstance(name_or_ordinal, int): 368 func.__name__ = name_or_ordinal AttributeError: /anaconda/envs/azureml_py36/lib/libxgboost.so: undefined symbol: XGDMatrixSetDenseInfo
Note that there seem to be two issues here:
- one, the version of ray that's supposed to be needed is incorrect, a newer version is needed than 1.0.0 as the code file says
- I have some sort of library issue (maybe dependencies?) here with running flaml with xgboost, and have to leave the algorithm out of the estimator list.
I made a PR to address the first issue. For the second issue, could you try installing flaml in a clean environment? If the problem persists, we need to look deeper into the environment you are using.
Hi,
I'm trying flaml on a new data set on a regression problem. I'm using version 0.10.0 . The following error also happens if I use classification. The dataset looks like this:
The code is as simple as I can have it (I removed all the extra params). The error also happens if I limit to a specific estimator:
And the stack trace is:
Any ideas? I've used FLAML succesfully in the past (older versions I can't go back to), but now I can't run a simpe fit.