pyg-team / pytorch-frame

Tabular Deep Learning Library for PyTorch
https://pytorch-frame.readthedocs.io
MIT License
505 stars 53 forks source link

fix error in xgboost #443

Closed puririshi98 closed 4 weeks ago

puririshi98 commented 1 month ago
_______ test_gbdt_with_save_load[task_type_and_metric4-stypes1-XGBoost] ________
gbdt_cls = <class 'torch_frame.gbdt.tuned_xgboost.XGBoost'>
stypes = [<stype.categorical: 'categorical'>]
task_type_and_metric = (<TaskType.MULTICLASS_CLASSIFICATION: 'multiclass_classification'>, <Metric.ACCURACY: 'accuracy'>)
    @pytest.mark.parametrize('gbdt_cls', [
        CatBoost,
        XGBoost,
        LightGBM,
    ])
    @pytest.mark.parametrize('stypes', [
        [stype.numerical],
        [stype.categorical],
        [stype.text_embedded],
        [stype.numerical, stype.numerical, stype.text_embedded],
    ])
    @pytest.mark.parametrize('task_type_and_metric', [
        (TaskType.REGRESSION, Metric.RMSE),
        (TaskType.REGRESSION, Metric.MAE),
        (TaskType.BINARY_CLASSIFICATION, Metric.ACCURACY),
        (TaskType.BINARY_CLASSIFICATION, Metric.ROCAUC),
        (TaskType.MULTICLASS_CLASSIFICATION, Metric.ACCURACY),
    ])
    def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric):
        task_type, metric = task_type_and_metric
        dataset: Dataset = FakeDataset(
            num_rows=30,
            with_nan=True,
            stypes=stypes,
            create_split=True,
            task_type=task_type,
            col_to_text_embedder_cfg=TextEmbedderConfig(
                text_embedder=HashTextEmbedder(8)),
        )
        dataset.materialize()
        gbdt = gbdt_cls(
            task_type=task_type,
            num_classes=dataset.num_classes
            if task_type == TaskType.MULTICLASS_CLASSIFICATION else None,
            metric=metric,
        )

        with tempfile.TemporaryDirectory() as temp_dir:
            path = osp.join(temp_dir, 'model.json')
            with pytest.raises(RuntimeError, match="is not yet fitted"):
                gbdt.save(path)

            if isinstance(gbdt_cls, XGBoost):
                gbdt.tune(tf_train=dataset.tensor_frame,
                          tf_val=dataset.tensor_frame, num_trials=2,
                          num_boost_round=1000, early_stopping_rounds=2)
                assert gbdt.model.best_iteration is not None
            else:
>               gbdt.tune(
                    tf_train=dataset.tensor_frame,
                    tf_val=dataset.tensor_frame,
                    num_trials=2,
                    num_boost_round=2,
                )
gbdt/test_gbdt.py:63: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/gbdt.py:88: in tune
    self._tune(tf_train, tf_val, num_trials=num_trials, *args, **kwargs)
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:227: in _tune
    study.optimize(
/usr/local/lib/python3.10/dist-packages/optuna/study/study.py:451: in optimize
    _optimize(
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:62: in _optimize
    _optimize_sequential(
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:159: in _optimize_sequential
    frozen_trial = _run_trial(study, func, catch)
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:247: in _run_trial
    raise func_err
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:196: in _run_trial
    value_or_values = func(trial)
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:228: in <lambda>
    lambda trial: self.objective(
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:178: in objective
    boost = xgboost.train(self.params, dtrain,
/usr/local/lib/python3.10/dist-packages/xgboost/core.py:726: in inner_f
    return func(**kwargs)
/usr/local/lib/python3.10/dist-packages/xgboost/training.py:181: in train
    bst.update(dtrain, iteration=i, fobj=obj)
/usr/local/lib/python3.10/dist-packages/xgboost/core.py:2100: in update
    _check_call(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
ret = -1
    def _check_call(ret: int) -> None:
        """Check the return value of C API call

        This function will raise exception when error occurs.
        Wrap every API call with this function

        Parameters
        ----------
        ret :
            return value from API calls
        """
        if ret != 0:
>           raise XGBoostError(py_str(_LIB.XGBGetLastError()))
E           xgboost.core.XGBoostError: [17:59:51] /home/coder/xgboost/src/gbm/gblinear.cc:147: Check failed: !p_fmat->Info().HasCategorical(): `gblinear` doesn't support categorical features.
E           Stack trace:
E             [bt] (0) /usr/lib/libxgboost.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x78) [0xfff162a309f8]
E             [bt] (1) /usr/lib/libxgboost.so(xgboost::gbm::GBLinear::DoBoost(xgboost::DMatrix*, xgboost::linalg::Tensor<xgboost::detail::GradientPairInternal<float>, 2>*, xgboost::PredictionCacheEntry*, xgboost::ObjFunction const*)+0x608) [0xfff162d0a7f8]
E             [bt] (2) /usr/lib/libxgboost.so(xgboost::LearnerImpl::UpdateOneIter(int, std::shared_ptr<xgboost::DMatrix>)+0x354) [0xfff162d63e54]
E             [bt] (3) /usr/lib/libxgboost.so(XGBoosterUpdateOneIter+0x7c) [0xfff1629876dc]
E             [bt] (4) /usr/lib/aarch64-linux-gnu/libffi.so.8(+0x6e10) [0xfffbc7296e10]
E             [bt] (5) /usr/lib/aarch64-linux-gnu/libffi.so.8(+0x3a94) [0xfffbc7293a94]
E             [bt] (6) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-aarch64-linux-gnu.so(+0x121c8) [0xfffbc72c21c8]
E             [bt] (7) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-aarch64-linux-gnu.so(+0x109ec) [0xfffbc72c09ec]
E             [bt] (8) /usr/bin/python(_PyObject_MakeTpCall+0x28c) [0xaaad7c0fa030]
/usr/local/lib/python3.10/dist-packages/xgboost/core.py:284: XGBoostError