microsoft / FLAML

A fast library for AutoML and tuning. Join our Discord: https://discord.gg/Cppx2vSPVP.
https://microsoft.github.io/FLAML/
MIT License
3.76k stars 495 forks source link

`AttributeError: best_iteration is only defined when early stopping is used.` with xgboost 2.0 #1217

Closed harupy closed 9 months ago

harupy commented 9 months ago

Code

def _create_model_automl(
    X,
    y,
    task: str,
    extended_task: str,
    step_config: Dict[str, Any],
    recipe_root: str,
    evaluation_metrics: Dict[str, RecipeMetric],
    primary_metric: str,
) -> Tuple["BaseEstimator", Dict[str, Any]]:
    try:
        from flaml import AutoML
    except ImportError:
        raise MlflowException("Please install FLAML to use AutoML!")

    try:
        if primary_metric in _MLFLOW_TO_FLAML_METRICS and primary_metric in evaluation_metrics:
            metric = _MLFLOW_TO_FLAML_METRICS[primary_metric]
            if primary_metric == "roc_auc" and extended_task == "classification/multiclass":
                metric = "roc_auc_ovr"
        elif primary_metric in _SKLEARN_METRICS and primary_metric in evaluation_metrics:
            metric = _create_sklearn_metric_flaml(
                primary_metric,
                -1 if evaluation_metrics[primary_metric].greater_is_better else 1,
                "macro" if extended_task in ["classification/multiclass"] else "binary",
            )
        elif primary_metric in evaluation_metrics:
            metric = _create_custom_metric_flaml(
                task,
                primary_metric,
                -1 if evaluation_metrics[primary_metric].greater_is_better else 1,
                _load_custom_metrics(recipe_root, [evaluation_metrics[primary_metric]])[0],
            )
        else:
            raise MlflowException(
                f"There is no FLAML alternative or custom metric for {primary_metric} metric."
            )

        automl_settings = step_config.get("flaml_params", {})
        automl_settings["time_budget"] = step_config.get(
            "time_budget_secs", _AUTOML_DEFAULT_TIME_BUDGET
        )
        automl_settings["metric"] = metric
        automl_settings["task"] = task
        # Disabled Autologging, because during the hyperparameter search
        # it tries to log the same parameters multiple times.
        mlflow.autolog(disable=True)
        automl = AutoML()
        automl.fit(X, y, **automl_settings)
        mlflow.autolog(disable=False, log_models=False)
        if automl.model is None:
            raise MlflowException(
                "AutoML (FLAML) could not train a suitable algorithm. "
                "Maybe you should increase `time_budget_secs`parameter "
                "to give AutoML process more time."
            )
        return automl.model.estimator, automl.best_config
    except Exception as e:
        _logger.warning(e, exc_info=e, stack_info=True)
        raise MlflowException(
            f"Error has occurred during training of AutoML model using FLAML: {e!r}"
        )

Traceback

Traceback (most recent call last):
  File "/home/runner/work/mlflow/mlflow/mlflow/recipes/steps/automl/flaml.py", line 170, in _create_model_automl
    automl.fit(X, y, **automl_settings)
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/automl.py", line 1928, in fit
    self._search()
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/automl.py", line 2482, in _search
    self._search_sequential()
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/automl.py", line 2318, in _search_sequential
    analysis = tune.run(
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/tune/tune.py", line 808, in run
    result = evaluation_function(trial_to_run.config)
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/state.py", line 302, in _compute_with_config_base
    ) = compute_estimator(
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/ml.py", line 369, in compute_estimator
    val_loss, metric_for_logging, train_time, pred_time = task.evaluate_model_CV(
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/task/generic_task.py", line 737, in evaluate_model_CV
    val_loss_i, metric_i, train_time_i, pred_time_i = get_val_loss(
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/ml.py", line 494, in get_val_loss
    estimator.fit(X_train, y_train, budget=budget, free_mem_ratio=free_mem_ratio, **fit_kwargs)
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/model.py", line 1652, in fit
    return super().fit(X_train, y_train, budget, free_mem_ratio, **kwargs)
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/flaml/automl/model.py", line 1415, in fit
    self._model.get_booster().best_iteration
  File "/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/xgboost/core.py", line 2602, in best_iteration
    raise AttributeError(
AttributeError: `best_iteration` is only defined when early stopping is used.

FLAML version

2.0.2

harupy commented 9 months ago

Creating a minimum reproduction code now.

harupy commented 9 months ago

Found https://github.com/dmlc/xgboost/pull/9403

harupy commented 9 months ago

https://pypi.org/project/xgboost/2.0.0/ was released today.

domoritz commented 9 months ago

Looks lime flaml is not yet compatible with XGBoost 2. It works with 1.x.

AlekseyGur commented 9 months ago

FLAML==2.1.0 xgboost==2.0.0

There is an error: AttributeError: best_iteration is only defined when early stopping is used

harupy commented 9 months ago

@AlekseyGur FLAML 2.1.0 doesn't include https://github.com/microsoft/FLAML/pull/1219. 2.2.0 should.

sonichi commented 9 months ago

Just released v2.1.1 to support xgb 2. Thanks for the reminder!