mljar / mljar-supervised

Python package for AutoML on Tabular Data with Feature Engineering, Hyper-Parameters Tuning, Explanations and Automatic Documentation
https://mljar.com
MIT License
3.05k stars 407 forks source link

user warning in test: tests/tests_preprocessing/test_scale.py::ScaleTest::test_fit_log_and_normal #759

Closed a-szulc closed 2 months ago

a-szulc commented 2 months ago
============================= test session starts ==============================
platform linux -- Python 3.12.3, pytest-8.3.2, pluggy-1.5.0 -- /home/adas/mljar/mljar-supervised/venv/bin/python3
cachedir: .pytest_cache
rootdir: /home/adas/mljar/mljar-supervised
configfile: pytest.ini
plugins: cov-5.0.0
collecting ... collected 1 item

tests/tests_preprocessing/test_scale.py::ScaleTest::test_fit_log_and_normal FAILED

=================================== FAILURES ===================================
______________________ ScaleTest.test_fit_log_and_normal _______________________

self = <tests.tests_preprocessing.test_scale.ScaleTest testMethod=test_fit_log_and_normal>

    def test_fit_log_and_normal(self):
        # training data
        d = {
            "col1": [12, 13, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
            "col2": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30.0],
            "col3": [12, 2, 3, 4, 5, 6, 7, 8000, 9000, 10000.0],
        }
        df = pd.DataFrame(data=d)

        scale = Scale(["col1", "col3"], scale_method=Scale.SCALE_LOG_AND_NORMAL)
        scale.fit(df)
        df = scale.transform(df)
        val = float(df["col1"][0])

        assert_almost_equal(np.mean(df["col1"]), 0)
        self.assertTrue(
            df["col1"][0] + 0.01 < df["col1"][1]
        )  # in case of wrong scaling the small values will be squeezed

        df = scale.inverse_transform(df)

        scale2 = Scale()
        scale_params = scale.to_json()

        scale2.from_json(scale_params)
>       df = scale2.transform(df)

tests/tests_preprocessing/test_scale.py:38: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
supervised/preprocessing/scale.py:40: in transform
    X.loc[:, self.columns] = self.scale.transform(X[self.columns])
venv/lib/python3.12/site-packages/sklearn/utils/_set_output.py:313: in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
venv/lib/python3.12/site-packages/sklearn/preprocessing/_data.py:1045: in transform
    X = self._validate_data(
venv/lib/python3.12/site-packages/sklearn/base.py:608: in _validate_data
    self._check_feature_names(X, reset=reset)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = StandardScaler()
X =        col1      col3
0  2.302585  2.397895
1  2.397895  0.000000
2  0.000000  0.693147
3  0.693147  1.098612
4  1.098... 1.386294
5  1.386294  1.609438
6  1.609438  1.791759
7  8.986947  8.987072
8  9.104758  9.104869
9  9.210140  9.210240

    def _check_feature_names(self, X, *, reset):
        """Set or check the `feature_names_in_` attribute.

        .. versionadded:: 1.0

        Parameters
        ----------
        X : {ndarray, dataframe} of shape (n_samples, n_features)
            The input samples.

        reset : bool
            Whether to reset the `feature_names_in_` attribute.
            If False, the input will be checked for consistency with
            feature names of data provided when reset was last True.
            .. note::
               It is recommended to call `reset=True` in `fit` and in the first
               call to `partial_fit`. All other methods that validate `X`
               should set `reset=False`.
        """

        if reset:
            feature_names_in = _get_feature_names(X)
            if feature_names_in is not None:
                self.feature_names_in_ = feature_names_in
            elif hasattr(self, "feature_names_in_"):
                # Delete the attribute when the estimator is fitted on a new dataset
                # that has no feature names.
                delattr(self, "feature_names_in_")
            return

        fitted_feature_names = getattr(self, "feature_names_in_", None)
        X_feature_names = _get_feature_names(X)

        if fitted_feature_names is None and X_feature_names is None:
            # no feature names seen in fit and in X
            return

        if X_feature_names is not None and fitted_feature_names is None:
>           warnings.warn(
                f"X has feature names, but {self.__class__.__name__} was fitted without"
                " feature names"
            )
E           UserWarning: X has feature names, but StandardScaler was fitted without feature names

venv/lib/python3.12/site-packages/sklearn/base.py:486: UserWarning
=========================== short test summary info ============================
FAILED tests/tests_preprocessing/test_scale.py::ScaleTest::test_fit_log_and_normal
============================== 1 failed in 1.95s ===============================
a-szulc commented 2 months ago

fixed in #767