Duplicates in self.info

import ethik
import pandas as pd
from sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
from sklearn import neighbors
from sklearn import pipeline
from sklearn import preprocessing

iris = datasets.load_iris()

X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target).map(lambda x: iris.target_names[x])

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, shuffle=True, random_state=42)

model = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    neighbors.KNeighborsClassifier()
)
model.fit(X_train, y_train)

y_pred = model.predict_proba(X_test)
y_pred = pd.DataFrame(y_pred, columns=model.classes_)

explainer = ethik.ClassificationExplainer(n_samples=30, memoize=True)
label = y_pred.columns[0]

explainer.explain_influence(
    X_test=X_test,
    y_pred=y_pred[label],
)

explainer.explain_influence(
    X_test=X_test["petal width (cm)"],
    y_pred=y_pred[label],
)

# Should contain one row only
explainer.info.query(f"label == '{label}' and feature == 'petal width (cm)' and tau == -0.85")

(I removed the hashing step for the group so that we can clearly see the difference)

For the same feature and the same tau, Query.target_from_tau() gives two different targets: 0.33053947368421055 and 0.33053947368421066, so the groups are different.

XAI-ANITI / ethik

Duplicates in self.info #126