Home repository for the Regularized Greedy Forest (RGF) library. It includes original implementation from the paper and multithreaded one written in C++, along with various language-specific wrappers.
378
stars
58
forks
source link
Feature importance (permutation or shapley values) #358
Other than the model importnaces given by RGF I would like to verify with permutation feature importance (scikit-learn) and/or with shapely values (shap package). But I can not do either, please help.
Operating System:
Microsoft Windows 10, x64-based PC
AMD Ryzen 7 5800HS Radeon Graphics, 3201 Mhz, 8 Cores, 16 Logical Processors
RGF/FastRGF/rgf_python version: 3.12.0
Python version (for rgf_python errors): 3.10.10
Error Message
runfile('C:/Users/bvphi/Documents/Python Scripts/new/rgf debug.py', wdir='C:/Users/bvphi/Documents/Python Scripts/new')
C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py:224: UserWarning: Cannot find FastRGF executable files. FastRGF estimators will be unavailable for usage.
warnings.warn("Cannot find FastRGF executable files. "
_RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\externals\loky\process_executor.py", line 428, in _process_worker
r = call_item()
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\externals\loky\process_executor.py", line 275, in call
return self.fn(*self.args, self.kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib_parallel_backends.py", line 620, in call
return self.func(*args, *kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py", line 288, in call
return [func(args, kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py", line 288, in
return [func(*args, kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\utils\parallel.py", line 123, in call
return self.function(*args, *kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\inspection_permutation_importance.py", line 63, in _calculate_permutation_scores
scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\inspection_permutation_importance.py", line 18, in _weights_scorer
return scorer(estimator, X, y)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\metrics_scorer.py", line 234, in call
return self._score(
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\metrics_scorer.py", line 276, in _score
y_pred = method_caller(estimator, "predict", X)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\metrics_scorer.py", line 73, in _cached_call
return getattr(estimator, method)(args, kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py", line 698, in predict
return self._estimators[0].predict(X)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py", line 352, in predict
self._execute_command(cmd)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py", line 303, in _execute_command
raise Exception(output)
Exception: "predict":
model_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.model-10
test_x_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x
prediction_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.predictions.txt
Log:ON
Wed Jun 28 17:36:40 2023: Reading test data ...
!File I/O error!: (Detected in AzFile::seekReadBytes)
C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x fread
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File ~\miniforge3\envs\spyMLenv\lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File c:\users\bvphi\documents\python scripts\new\rgf debug.py:21
result = permutation_importance(do, X_test, y_test, scoring="r2",
File ~\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\inspection_permutation_importance.py:258 in permutation_importance
scores = Parallel(n_jobs=n_jobs)(
File ~\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\utils\parallel.py:63 in call
return super().call(iterable_with_config)
File ~\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py:1098 in call
self.retrieve()
File ~\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py:975 in retrieve
self._output.extend(job.get(timeout=self.timeout))
File ~\miniforge3\envs\spyMLenv\lib\site-packages\joblib_parallel_backends.py:567 in wrap_future_result
return future.result(timeout=timeout)
File ~\miniforge3\envs\spyMLenv\lib\concurrent\futures_base.py:458 in result
return self.get_result()
File ~\miniforge3\envs\spyMLenv\lib\concurrent\futures_base.py:403 in get_result
raise self._exception
Exception: "predict":
model_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.model-10
test_x_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x
prediction_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.predictions.txt
Log:ON
Wed Jun 28 17:36:40 2023: Reading test data ...
!File I/O error!: (Detected in AzFile::seekReadBytes)
C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x fread
Traceback (most recent call last):
File ~\miniforge3\envs\spyMLenv\lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File c:\users\bvphi\documents\python scripts\new\rgf debug.py:36
ex = shap.TreeExplainer(rgf)
File ~\miniforge3\envs\spyMLenv\lib\site-packages\shap\explainers\_tree.py:149 in __init__
self.model = TreeEnsemble(model, self.data, self.data_missing, model_output)
File ~\miniforge3\envs\spyMLenv\lib\site-packages\shap\explainers\_tree.py:993 in __init__
raise InvalidModelError("Model type not yet supported by TreeExplainer: " + str(type(model)))
InvalidModelError: Model type not yet supported by TreeExplainer:
## Reproducible Example
# Import modules
import gc
import time
import shap
import numpy as np
import pandas as pd
from rgf.sklearn import RGFRegressor
from rgf.utils import cleanup
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
rs = np.random.RandomState(2468097531)
X, y = make_regression(n_samples=15000, n_features=5)
# Model
rgf = RGFRegressor()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rs)
# PFI
start_time = time.time()
rgf.fit(X_train, y_train)
result = permutation_importance(rgf, X_test, y_test, scoring="r2",
n_repeats=10, n_jobs=-1, random_state=rs)
elapsed_time = time.time() - start_time
print(f"Elapsed time to compute the importances: {elapsed_time:.3f} seconds")
feature_names = ["1", "2", "3", "4", "5"]
Importances = pd.Series(result.importances_mean, index=feature_names)
fig, ax = plt.subplots()
Importances.plot.bar(yerr=result.importances_std, ax=ax)
ax.set_title("Permutations of full model")
ax.set_ylabel("Feature importances")
fig.tight_layout()
plt.show()
shap.initjs()
ex = shap.TreeExplainer(rgf)
shap_values = ex.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
cleanup()
gc.collect()
Other than the model importnaces given by RGF I would like to verify with permutation feature importance (scikit-learn) and/or with shapely values (shap package). But I can not do either, please help.
Environment Info
ACSVCPORT: 17532 ALLUSERSPROFILE: C:\ProgramData APPDATA: C:\Users\bvphi\AppData\Roaming ASL.LOG: Destination=file COMMONPROGRAMFILES: C:\Program Files\Common Files COMMONPROGRAMFILES(X86): C:\Program Files (x86)\Common Files COMMONPROGRAMW6432: C:\Program Files\Common Files COMPUTERNAME: LAPTOP-F77VAIJ2 COMSPEC: C:\Windows\system32\cmd.exe CONDA_DEFAULT_ENV: spyMLenv CONDA_EXE: C:\Users\bvphi\miniforge3\condabin..\Scripts\conda.exe CONDA_EXES: "C:\Users\bvphi\miniforge3\condabin..\Scripts\conda.exe"
CONDA_PREFIX: C:\Users\bvphi\miniforge3\envs\spyMLenv CONDA_PREFIX_1: C:\Users\bvphi\miniforge3 CONDA_PROMPT_MODIFIER: (spyMLenv) CONDA_PYTHON_EXE: C:\Users\bvphi\miniforge3\python.exe CONDA_SHLVL: 2 DRIVERDATA: C:\Windows\System32\Drivers\DriverData HOMEDRIVE: C: HOMEPATH: \Users\bvphi LOCALAPPDATA: C:\Users\bvphi\AppData\Local LOGONSERVER: \LAPTOP-F77VAIJ2 NUMBER_OF_PROCESSORS: 16 ONEDRIVE: C:\Users\bvphi\OneDrive ONEDRIVECONSUMER: C:\Users\bvphi\OneDrive OS: Windows_NT PATH: C:\Users\bvphi\miniforge3\envs\spyMLenv;C:\Users\bvphi\miniforge3\envs\spyMLenv\Library\mingw-w64\bin;C:\Users\bvphi\miniforge3\envs\spyMLenv\Library\usr\bin;C:\Users\bvphi\miniforge3\envs\spyMLenv\Library\bin;C:\Users\bvphi\miniforge3\envs\spyMLenv\Scripts;C:\Users\bvphi\miniforge3\envs\spyMLenv\bin;C:\Users\bvphi\miniforge3\condabin;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0;C:\Windows\System32\OpenSSH;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\Program Files\Brandon Castellano\DVR-Scan;C:\Program Files\dotnet;C:\Users\bvphi\AppData\Local\Programs\Python\Python311\Scripts;C:\Users\bvphi\AppData\Local\Programs\Python\Python311;C:\Users\bvphi\AppData\Local\Microsoft\WindowsApps PATHEXT: .COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC PROCESSOR_ARCHITECTURE: AMD64 PROCESSOR_IDENTIFIER: AMD64 Family 25 Model 80 Stepping 0, AuthenticAMD PROCESSOR_LEVEL: 25 PROCESSOR_REVISION: 5000 PROGRAMDATA: C:\ProgramData PROGRAMFILES: C:\Program Files PROGRAMFILES(X86): C:\Program Files (x86) PROGRAMW6432: C:\Program Files PROMPT: (spyMLenv) $P$G PSMODULEPATH: C:\Program Files\WindowsPowerShell\Modules;C:\Windows\system32\WindowsPowerShell\v1.0\Modules PUBLIC: C:\Users\Public RLSSVCPORT: 22112 SYSTEMDRIVE: C: SYSTEMROOT: C:\Windows TEMP: C:\Users\bvphi\AppData\Local\Temp TMP: C:\Users\bvphi\AppData\Local\Temp USERDOMAIN: LAPTOP-F77VAIJ2 USERDOMAIN_ROAMINGPROFILE: LAPTOP-F77VAIJ2 USERNAME: bvphi USERPROFILE: C:\Users\bvphi WINDIR: C:\Windows XML_CATALOG_FILES: file:///C:/Users/bvphi/miniforge3/envs/spyMLenv/etc/xml/catalog LANG: en SPYDER_ARGS: [] QT_SCALE_FACTOR: QT_SCREEN_SCALE_FACTORS: SPYDER_DEBUG_FILE: C:\Users\bvphi.spyder-py3\spyder-debug.log SPY_EXTERNAL_INTERPRETER: False SPY_UMR_ENABLED: True SPY_UMR_VERBOSE: True SPY_UMR_NAMELIST: SPY_RUN_LINES_O: SPY_PYLAB_O: True SPY_BACKEND_O: 0 SPY_AUTOLOAD_PYLAB_O: False SPY_FORMAT_O: 0 SPY_BBOX_INCHES_O: True SPY_RESOLUTION_O: 72 SPY_WIDTH_O: 6 SPY_HEIGHT_O: 4 SPY_USE_FILE_O: False SPY_RUN_FILE_O: SPY_AUTOCALL_O: 0 SPY_GREEDY_O: False SPY_JEDI_O: False SPY_SYMPY_O: False SPY_TESTING: False SPY_HIDE_CMD: True SPY_PYTHONPATH: JPY_INTERRUPT_EVENT: 11012 IPY_INTERRUPT_EVENT: 11012 JPY_PARENT_PID: 11016 PYDEVD_USE_FRAME_EVAL: NO TERM: xterm-color CLICOLOR: 1 FORCE_COLOR: 1 CLICOLOR_FORCE: 1 PAGER: cat GIT_PAGER: cat MPLBACKEND: module://matplotlib_inline.backend_inline KMP_INIT_AT_FORK: FALSE KMP_DUPLICATE_LIB_OK: True
Operating System: Microsoft Windows 10, x64-based PC AMD Ryzen 7 5800HS Radeon Graphics, 3201 Mhz, 8 Cores, 16 Logical Processors
RGF/FastRGF/rgf_python version: 3.12.0
Python version (for rgf_python errors): 3.10.10
Error Message
runfile('C:/Users/bvphi/Documents/Python Scripts/new/rgf debug.py', wdir='C:/Users/bvphi/Documents/Python Scripts/new') C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py:224: UserWarning: Cannot find FastRGF executable files. FastRGF estimators will be unavailable for usage. warnings.warn("Cannot find FastRGF executable files. " _RemoteTraceback: """ Traceback (most recent call last): File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\externals\loky\process_executor.py", line 428, in _process_worker r = call_item() File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\externals\loky\process_executor.py", line 275, in call return self.fn(*self.args, self.kwargs) File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib_parallel_backends.py", line 620, in call return self.func(*args, *kwargs) File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py", line 288, in call return [func(args, kwargs) File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py", line 288, in
return [func(*args, kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\utils\parallel.py", line 123, in call
return self.function(*args, *kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\inspection_permutation_importance.py", line 63, in _calculate_permutation_scores
scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\inspection_permutation_importance.py", line 18, in _weights_scorer
return scorer(estimator, X, y)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\metrics_scorer.py", line 234, in call
return self._score(
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\metrics_scorer.py", line 276, in _score
y_pred = method_caller(estimator, "predict", X)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\metrics_scorer.py", line 73, in _cached_call
return getattr(estimator, method)(args, kwargs)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py", line 698, in predict
return self._estimators[0].predict(X)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py", line 352, in predict
self._execute_command(cmd)
File "C:\Users\bvphi\miniforge3\envs\spyMLenv\lib\site-packages\rgf\utils.py", line 303, in _execute_command
raise Exception(output)
Exception: "predict":
model_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.model-10
test_x_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x
prediction_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.predictions.txt
Log:ON
Wed Jun 28 17:36:40 2023: Reading test data ... !File I/O error!: (Detected in AzFile::seekReadBytes) C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x fread """ The above exception was the direct cause of the following exception: Traceback (most recent call last): File ~\miniforge3\envs\spyMLenv\lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec exec(code, globals, locals) File c:\users\bvphi\documents\python scripts\new\rgf debug.py:21 result = permutation_importance(do, X_test, y_test, scoring="r2", File ~\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\inspection_permutation_importance.py:258 in permutation_importance scores = Parallel(n_jobs=n_jobs)( File ~\miniforge3\envs\spyMLenv\lib\site-packages\sklearn\utils\parallel.py:63 in call return super().call(iterable_with_config) File ~\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py:1098 in call self.retrieve() File ~\miniforge3\envs\spyMLenv\lib\site-packages\joblib\parallel.py:975 in retrieve self._output.extend(job.get(timeout=self.timeout)) File ~\miniforge3\envs\spyMLenv\lib\site-packages\joblib_parallel_backends.py:567 in wrap_future_result return future.result(timeout=timeout) File ~\miniforge3\envs\spyMLenv\lib\concurrent\futures_base.py:458 in result return self.get_result() File ~\miniforge3\envs\spyMLenv\lib\concurrent\futures_base.py:403 in get_result raise self._exception Exception: "predict": model_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.model-10 test_x_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x prediction_fn=C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.predictions.txt Log:ON
Wed Jun 28 17:36:40 2023: Reading test data ... !File I/O error!: (Detected in AzFile::seekReadBytes) C:\Users\bvphi\AppData\Local\Temp\rgf\31f6e18b-26f2-44d0-9b6f-aa37840498c31.test.data.x fread