Moffran / calibrated_explanations

Repository for the explanation method Calibrated Explanations (CE)
BSD 3-Clause "New" or "Revised" License
47 stars 6 forks source link

Example is not working #26

Closed empowerVictor closed 8 months ago

empowerVictor commented 8 months ago

To Reproduce Steps to reproduce the behavior: Run example from readme

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

dataset = fetch_openml(name="wine", version=7, as_frame=True)

X = dataset.data.values.astype(float)
y = dataset.target.values

feature_names = dataset.feature_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=2, stratify=y)

X_prop_train, X_cal, y_prop_train, y_cal = train_test_split(X_train, y_train,
                                                            test_size=0.25)
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_jobs=-1)

rf.fit(X_prop_train, y_prop_train)

from calibrated_explanations import CalibratedExplainer, __version__
print(__version__)

explainer = CalibratedExplainer(rf, X_cal, y_cal, feature_names=feature_names)

factual_explanations = explainer.explain_factual(X_test)
ValueError                                Traceback (most recent call last)
Cell In[52], [line 24](vscode-notebook-cell:?execution_count=52&line=24)
     [21](vscode-notebook-cell:?execution_count=52&line=21) from calibrated_explanations import CalibratedExplainer, __version__
     [22](vscode-notebook-cell:?execution_count=52&line=22) print(__version__)
---> [24](vscode-notebook-cell:?execution_count=52&line=24) explainer = CalibratedExplainer(rf, X_cal, y_cal, feature_names=feature_names)
     [26](vscode-notebook-cell:?execution_count=52&line=26) factual_explanations = explainer.explain_factual(X_test)

File [site-packages\calibrated_explanations\core.py:128](site-packages/calibrated_explanations/core.py:128), in CalibratedExplainer.__init__(self, model, cal_X, cal_y, mode, feature_names, categorical_features, categorical_labels, class_labels, difficulty_estimator, sample_percentiles, random_state, verbose)
    [126](site-packages/calibrated_explanations/core.py:126) self.set_difficulty_estimator(difficulty_estimator, initialize=False)
    [127](site-packages/calibrated_explanations/core.py:127) self.__set_mode(str.lower(mode), initialize=False)
--> [128](site-packages/calibrated_explanations/core.py:128) self.__initialize_interval_model()
    [130](site-packages/calibrated_explanations/core.py:130) self.categorical_labels = categorical_labels
    [131](site-packages/calibrated_explanations/core.py:131) self.class_labels = class_labels

File [site-packages\calibrated_explanations\core.py:659](site-packages/calibrated_explanations/core.py:659), in CalibratedExplainer.__initialize_interval_model(self)
    [657](site-packages/calibrated_explanations/core.py:657) def __initialize_interval_model(self) -> None:
    [658](site-packages/calibrated_explanations/core.py:658)     if self.mode == 'classification':
--> [659](site-packages/calibrated_explanations/core.py:659)         self.interval_model = VennAbers(self.model.predict_proba(self.cal_X), self.cal_y, self.model)
    [660](site-packages/calibrated_explanations/core.py:660)     elif 'regression' in self.mode:
    [661](site-packages/calibrated_explanations/core.py:661)         self.interval_model = IntervalRegressor(self)

File [site-packages\calibrated_explanations\VennAbers.py:18](site-packages/calibrated_explanations/VennAbers.py:18), in VennAbers.__init__(self, cal_probs, cal_y, model)
     [16](site-packages/calibrated_explanations/VennAbers.py:16) self.va = va.VennAbers()
     [17](site-packages/calibrated_explanations/VennAbers.py:17) cprobs, predict = self.get_p_value(self.cprobs)
---> [18](site-packages/calibrated_explanations/VennAbers.py:18) self.va.fit(cprobs, np.multiply(predict == self.ctargets, 1) if self.is_multiclass() else self.ctargets, precision=4)

File [site-packages\venn_abers\venn_abers.py:274](site-packages/venn_abers/venn_abers.py:274), in VennAbers.fit(self, p_cal, y_cal, precision)
    [258](site-packages/venn_abers/venn_abers.py:258) def fit(self, p_cal, y_cal, precision=None):
    [259](site-packages/venn_abers/venn_abers.py:259)     """Fits the VennAbers calibrator to the calibration dataset
    [260](site-packages/venn_abers/venn_abers.py:260) 
    [261](site-packages/venn_abers/venn_abers.py:261) 
   (...)
    [272](site-packages/venn_abers/venn_abers.py:272)         Yields significantly faster computation time for larger calibration datasets
    [273](site-packages/venn_abers/venn_abers.py:273)     """
--> [274](site-packages/venn_abers/venn_abers.py:274)     self.p0, self.p1, self.c = calc_p0p1(p_cal, y_cal, precision)

File [site-packages\venn_abers\venn_abers.py:80](site-packages/venn_abers/venn_abers.py:80), in calc_p0p1(p_cal, y_cal, precision)
     [77](site-packages/venn_abers/venn_abers.py:77) P[0, :] = -1
     [79](site-packages/venn_abers/venn_abers.py:79) P[2:, 0] = np.cumsum(w)
---> [80](site-packages/venn_abers/venn_abers.py:80) P[2:-1, 1] = np.cumsum(k_label_sort)[(ia - 1)[1:]]
     [81](site-packages/venn_abers/venn_abers.py:81) P[-1, 1] = np.cumsum(k_label_sort)[-1]
     [83](site-packages/venn_abers/venn_abers.py:83) p1 = np.zeros((len(c) + 1, 2))

ValueError: could not convert string to float: 'FalseFalseFalseFalseFalseFalse'

Desktop (please complete the following information):

tuvelofstrom commented 8 months ago

Thanks for spotting! As CE does not (yet) support string representation for the targets in classification, the following change have been made to README.md: y = (dataset.target.values == 'True').astype(int), which is how it is implemented in the quickstart notebook.

Adding support for string targets is a good idea! I Will add a feature request for that.