TeamHG-Memex / eli5

A library for debugging/inspecting machine learning classifiers and explaining their predictions
http://eli5.readthedocs.io
MIT License
2.75k stars 331 forks source link

'(slice(None, None, None), 0)' is an invalid key #403

Open ibobak opened 3 years ago

ibobak commented 3 years ago

I am getting this strange exception.

def the_predict_func(X, y):
    prediction = model.predict(X)
    return (abs(prediction-y) / y).mean()

the_predict_func(X_test, y_test)

0.9897550548123452

from eli5.permutation_importance import get_score_importances

base_score, score_decreases = get_score_importances(the_predict_func, X_test, y_test)
feature_importances = np.mean(score_decreases, axis=0)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-87-9e3de7ed2dd6> in <module>
      1 from eli5.permutation_importance import get_score_importances
      2 
----> 3 base_score, score_decreases = get_score_importances(the_predict_func, X_test, y_test)
      4 feature_importances = np.mean(score_decreases, axis=0)

/opt/anaconda3/envs/lto/lib/python3.8/site-packages/eli5/permutation_importance.py in get_score_importances(score_func, X, y, n_iter, columns_to_shuffle, random_state)
     87     scores_decreases = []
     88     for i in range(n_iter):
---> 89         scores_shuffled = _get_scores_shufled(
     90             score_func, X, y, columns_to_shuffle=columns_to_shuffle,
     91             random_state=rng

/opt/anaconda3/envs/lto/lib/python3.8/site-packages/eli5/permutation_importance.py in _get_scores_shufled(score_func, X, y, columns_to_shuffle, random_state)
     98                         random_state=None):
     99     Xs = iter_shuffled(X, columns_to_shuffle, random_state=random_state)
--> 100     return np.array([score_func(X_shuffled, y) for X_shuffled in Xs])

/opt/anaconda3/envs/lto/lib/python3.8/site-packages/eli5/permutation_importance.py in <listcomp>(.0)
     98                         random_state=None):
     99     Xs = iter_shuffled(X, columns_to_shuffle, random_state=random_state)
--> 100     return np.array([score_func(X_shuffled, y) for X_shuffled in Xs])

/opt/anaconda3/envs/lto/lib/python3.8/site-packages/eli5/permutation_importance.py in iter_shuffled(X, columns_to_shuffle, pre_shuffle, random_state)
     48             X_res[:, columns] = X_shuffled[:, columns]
     49         else:
---> 50             rng.shuffle(X_res[:, columns])
     51         yield X_res
     52         X_res[:, columns] = X[:, columns]

/opt/anaconda3/envs/lto/lib/python3.8/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2900             if self.columns.nlevels > 1:
   2901                 return self._getitem_multilevel(key)
-> 2902             indexer = self.columns.get_loc(key)
   2903             if is_integer(indexer):
   2904                 indexer = [indexer]

/opt/anaconda3/envs/lto/lib/python3.8/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2887             casted_key = self._maybe_cast_indexer(key)
   2888             try:
-> 2889                 return self._engine.get_loc(casted_key)
   2890             except KeyError as err:
   2891                 raise KeyError(key) from err

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

TypeError: '(slice(None, None, None), 0)' is an invalid key

X_test is a dataframe with index

MultiIndex([(2372, '400007358', 14306, '2019-12-22'),
            (2372, '400007358', 22777, '2019-12-09'),
            ....
            (2421, '400007359',  4333, '2019-12-19')],
           names=['item_id', 'cust_item_id', 'locn_id', 'bday'], length=547056)

y_test is a series with exactly the same index as above.

Am I doing something wrong, or this is a bug? Thanks in advance.