import ethik
import pandas as pd
from sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
from sklearn import neighbors
from sklearn import pipeline
from sklearn import preprocessing
iris = datasets.load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target).map(lambda x: iris.target_names[x])
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, shuffle=True, random_state=42)
model = pipeline.make_pipeline(
preprocessing.StandardScaler(),
neighbors.KNeighborsClassifier()
)
model.fit(X_train, y_train)
y_pred = model.predict_proba(X_test)
y_pred = pd.DataFrame(y_pred, columns=model.classes_)
explainer = ethik.ClassificationExplainer(n_samples=30, memoize=True)
label = y_pred.columns[0]
explainer.explain_influence(
X_test=X_test,
y_pred=y_pred[label],
)
explainer.explain_influence(
X_test=X_test["petal width (cm)"],
y_pred=y_pred[label],
)
# Should contain one row only
explainer.info.query(f"label == '{label}' and feature == 'petal width (cm)' and tau == -0.85")
(I removed the hashing step for the group so that we can clearly see the difference)
For the same feature and the same tau, Query.target_from_tau() gives two different targets: 0.33053947368421055 and 0.33053947368421066, so the groups are different.
(I removed the hashing step for the group so that we can clearly see the difference)
For the same feature and the same tau,
Query.target_from_tau()
gives two different targets:0.33053947368421055
and0.33053947368421066
, so the groups are different.