eli5-org / eli5

A library for debugging/inspecting machine learning classifiers and explaining their predictions
MIT License
262 stars 42 forks source link

Modification to work with scikit-learn >=1.3 y Eli5. #42

Open raultomasmora opened 2 months ago

raultomasmora commented 2 months ago

Modification to work with scikit-learn >=1.3 y Eli5. Resolves the issue of "ImportError: cannot import name 'if_delegate_has_method' from 'sklearn.utils.metaestimators'" sklearn.utils.metaestimators.if_delegate_has_method, deprecated since version 1.3. Use available_if instead.

Make the following changes in the code of this file "..\eli5\sklearn\permutation_importance.py" (C:\Anaconda3\envs\python_3-12-4\Lib\site-packages\eli5\sklearn\permutation_importance.py)

Solución: Change "from sklearn.utils.metaestimators import if_delegate_has_method" by "from sklearn.utils.metaestimators import available_if"

and change all occurrences of "@if_delegate_has_method(delegate='wrapped_estimator_')" by "@available_if('wrapped_estimator_')"

lukaspistelak commented 2 weeks ago

i have same problem , how can i fix it ? or it will be fixed ?

raultomasmora commented 2 weeks ago

i have same problem , how can i fix it ? or it will be fixed ?

Search in your computer for the following file "...\eli5\sklearn\permutation_importance.py"

In Win10 I have it with Anaconda in this link: "C:\Anaconda3\envs\name_of_your_environment\Lib\site-packages\eli5\sklearn\permutation_importance.py"

You open it, modify the following texts and save it.

Change "from sklearn.utils.metaestimators import if_delegate_has_method" by "from sklearn.utils.metaestimators import available_if"

and change all occurrences of "@if_delegate_has_method(delegate='wrappedestimator')" by "@available_if('wrappedestimator')"

Attached modified file permutation_importance.py.zip

lukaspistelak commented 2 weeks ago

thanks, but when it will be released in pip ?

lopuhin commented 2 weeks ago

hi, we'll try to do a release on pip with the fixes, sorry for a delay

lukaspistelak commented 2 weeks ago

i modified files but this code throws another error :

import warnings


from eli5 import show_weights
from eli5.sklearn import PermutationImportance
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.model_selection import KFold
from sklearn.svm import SVR

warnings.filterwarnings("ignore", category=FutureWarning)

X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)

splitter = KFold(n_splits=3) # 3 folds as in the example

estimator = SVR(kernel="linear")

selector = RFECV(
    PermutationImportance(estimator,  scoring='r2', n_iter=10, random_state=42, cv=splitter),
    cv=splitter,
    scoring='r2',
    step=1
)
selector = selector.fit(X, y)
selector.ranking_

show_weights(selector.estimator_)

AttributeError                            Traceback (most recent call last)
Cell In[52], line 26
     18 estimator = SVR(kernel="linear")
     20 selector = RFECV(
     21     PermutationImportance(estimator,  scoring='r2', n_iter=10, random_state=42, cv=splitter),
     22     cv=splitter,
     23     scoring='r2',
     24     step=1
     25 )
---> 26 selector = selector.fit(X, y)
     27 selector.ranking_
     29 show_weights(selector.estimator_)

File ~/.local/lib/python3.10/site-packages/sklearn/base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1466     estimator._validate_params()
   1468 with config_context(
   1469     skip_parameter_validation=(
   1470         prefer_skip_nested_validation or global_skip_validation
   1471     )
   1472 ):
-> 1473     return fit_method(estimator, *args, **kwargs)

File ~/.local/lib/python3.10/site-packages/sklearn/feature_selection/_rfe.py:777, in RFECV.fit(self, X, y, groups)
    774     parallel = Parallel(n_jobs=self.n_jobs)
    775     func = delayed(_rfe_single_fit)
--> 777 scores_features = parallel(
    778     func(rfe, self.estimator, X, y, train, test, scorer)
    779     for train, test in cv.split(X, y, groups)
    780 )
    781 scores, step_n_features = zip(*scores_features)
    783 step_n_features_rev = np.array(step_n_features[0])[::-1]

File ~/.local/lib/python3.10/site-packages/sklearn/feature_selection/_rfe.py:778, in <genexpr>(.0)
    774     parallel = Parallel(n_jobs=self.n_jobs)
    775     func = delayed(_rfe_single_fit)
    777 scores_features = parallel(
--> 778     func(rfe, self.estimator, X, y, train, test, scorer)
    779     for train, test in cv.split(X, y, groups)
    780 )
    781 scores, step_n_features = zip(*scores_features)
    783 step_n_features_rev = np.array(step_n_features[0])[::-1]

File ~/.local/lib/python3.10/site-packages/sklearn/feature_selection/_rfe.py:37, in _rfe_single_fit(rfe, estimator, X, y, train, test, scorer)
     34 X_train, y_train = _safe_split(estimator, X, y, train)
     35 X_test, y_test = _safe_split(estimator, X, y, test, train)
---> 37 rfe._fit(
     38     X_train,
     39     y_train,
     40     lambda estimator, features: _score(
     41         # TODO(SLEP6): pass score_params here
     42         estimator,
     43         X_test[:, features],
     44         y_test,
     45         scorer,
     46         score_params=None,
     47     ),
     48 )
     50 return rfe.step_scores_, rfe.step_n_features_

File ~/.local/lib/python3.10/site-packages/sklearn/feature_selection/_rfe.py:344, in RFE._fit(self, X, y, step_score, **fit_params)
    342 if step_score:
    343     self.step_n_features_.append(len(features))
--> 344     self.step_scores_.append(step_score(estimator, features))
    345 support_[features[ranks][:threshold]] = False
    346 ranking_[np.logical_not(support_)] += 1

File ~/.local/lib/python3.10/site-packages/sklearn/feature_selection/_rfe.py:40, in _rfe_single_fit.<locals>.<lambda>(estimator, features)
     34 X_train, y_train = _safe_split(estimator, X, y, train)
     35 X_test, y_test = _safe_split(estimator, X, y, test, train)
     37 rfe._fit(
     38     X_train,
     39     y_train,
---> 40     lambda estimator, features: _score(
     41         # TODO(SLEP6): pass score_params here
     42         estimator,
     43         X_test[:, features],
     44         y_test,
     45         scorer,
     46         score_params=None,
     47     ),
     48 )
     50 return rfe.step_scores_, rfe.step_n_features_

File ~/.local/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:971, in _score(estimator, X_test, y_test, scorer, score_params, error_score)
    969         scores = scorer(estimator, X_test, **score_params)
    970     else:
--> 971         scores = scorer(estimator, X_test, y_test, **score_params)
    972 except Exception:
    973     if isinstance(scorer, _MultimetricScorer):
    974         # If `_MultimetricScorer` raises exception, the `error_score`
    975         # parameter is equal to "raise".

File ~/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py:279, in _BaseScorer.__call__(self, estimator, X, y_true, sample_weight, **kwargs)
    276 if sample_weight is not None:
    277     _kwargs["sample_weight"] = sample_weight
--> 279 return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)

File ~/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py:370, in _Scorer._score(self, method_caller, estimator, X, y_true, **kwargs)
    360 self._warn_overlap(
    361     message=(
    362         "There is an overlap between set kwargs of this scorer instance and"
   (...)
    366     kwargs=kwargs,
    367 )
    369 pos_label = None if is_regressor(estimator) else self._get_pos_label()
--> 370 response_method = _check_response_method(estimator, self._response_method)
    371 y_pred = method_caller(
    372     estimator, response_method.__name__, X, pos_label=pos_label
    373 )
    375 scoring_kwargs = {**self._kwargs, **kwargs}

File ~/.local/lib/python3.10/site-packages/sklearn/utils/validation.py:2145, in _check_response_method(estimator, response_method)
   2143 prediction_method = reduce(lambda x, y: x or y, prediction_method)
   2144 if prediction_method is None:
-> 2145     raise AttributeError(
   2146         f"{estimator.__class__.__name__} has none of the following attributes: "
   2147         f"{', '.join(list_methods)}."
   2148     )
   2150 return prediction_method

AttributeError: PermutationImportance has none of the following attributes: predict.
raultomasmora commented 2 weeks ago

If I am not wrong, I believe that “PermutationImportance” from the “eli5” library is not intended for use with sklearn's RFECV.

The error you get is generated by the sklearn library.

Try this code without RFECV:

estimator = SVR(kernel="linear")
importance = PermutationImportance(estimator,  scoring='r2', n_iter=10, random_state=42, cv=splitter).fit(X, y)

importance_to_df = pd.DataFrame(data=list(zip(X.columns.values, importance.feature_importances_, importance.feature_importances_std_)), 
                              columns=['feature', 'weight', 'std']).sort_values(by=['weight'], ascending=[False])
display(importance_to_df)

importance_eli5 = eli5.show_weights(estimator=importance, feature_names=X.columns.values, top=None)
display(importance_eli5)

You can also use permutation_importance from sklearn.

https://scikit-learn.org/stable/modules/generated/sklearn.inspection.permutation_importance.html