Error with the "sampling" calculation method

PierrickPochelu commented 1 year ago

import numpy as np
import pandas as pd
from survshap import SurvivalModelExplainer, ModelSurvSHAP
import time

nb_features=7
nb_events=200

np_X=np.random.rand(nb_events, nb_features)
np_time=np.random.rand(nb_events, 1)
np_is_living=np_X[:,0] < np_time[:,0]

y=np.empty(nb_events, dtype=[('event', '?'), ('time', '<f16')])
y['event']=np_is_living.reshape(-1)
y['time']=np_time.reshape(-1)
X=pd.DataFrame(np_X,columns=['f'+str(i) for i in range(1,nb_features+1)])

from sksurv.ensemble import RandomSurvivalForest
rsf=RandomSurvivalForest(random_state=42)
st=time.time()
rsf.fit(X,y)
print(f"score:{rsf.score(X,y)} fit time:{time.time()-st}")
print(f"predict: {rsf.predict(X)}")

exp_rsf=SurvivalModelExplainer(rsf,X,y)
ms_rsf=ModelSurvSHAP(random_state=42, calculation_method="sampling")
st=time.time()
ms_rsf.fit(exp_rsf)
print(f"Interpretation time:{time.time()-st}")

produces:

    raise TypeError(f"Could not convert {x} to numeric") from err
TypeError: Could not convert f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1 to numeric

Process finished with exit code 1

krzyzinskim commented 1 year ago

Thank you for reporting the error, it was a matter of changing the default parameter in DataFrameGroupBy.mean() function in pandas 2.0.0. It's already fixed.

PierrickPochelu commented 1 year ago

import numpy as np import pandas as pd from survshap import SurvivalModelExplainer, ModelSurvSHAP import time

nb_features=7 nb_events=200

np_X=np.random.rand(nb_events, nb_features) np_time=np.random.rand(nb_events, 1) np_is_living=np_X[:,0] < np_time[:,0]

y=np.empty(nb_events, dtype=[('event', '?'), ('time', '<f16')]) y['event']=np_is_living.reshape(-1) y['time']=np_time.reshape(-1) X=pd.DataFrame(np_X,columns=['f'+str(i) for i in range(1,nb_features+1)])

from sksurv.ensemble import RandomSurvivalForest rsf=RandomSurvivalForest(random_state=42) st=time.time() rsf.fit(X,y) print(f"score:{rsf.score(X,y)} fit time:{time.time()-st}") print(f"predict: {rsf.predict(X)}")

exp_rsf=SurvivalModelExplainer(rsf,X,y) ms_rsf=ModelSurvSHAP(random_state=42, calculation_method="sampling") st=time.time() ms_rsf.fit(exp_rsf) print(f"Interpretation time:{time.time()-st}")

PierrickPochelu commented 1 year ago

Good job! Everything is working fine now :)

MI2DataLab / survshap

Error with the "sampling" calculation method #20