Open tegku opened 9 months ago
Open and edit Boruta-Shap/src/BorutaShap.py to do this, on two lines:
In Line 9:
Change
from scipy.stats import binom_test, ks_2samp
to
from scipy.stats import binomtest, ks_2samp
Line 885: Change return [binom_test(x, n=n, p=p, alternative=alternative) for x in array] to return [binomtest(x, n=n, p=p, alternative=alternative).pvalue for x in array]
Dear Sir or Madam,
Based on your revision, I also encountered another issue in 'binomtest(x, n=n, p=p, alternative=alternative).pvalue':
0%| | 0/100 [00:08<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[75], line 10
5 X_encoded = X#OrdinalEncoderPandas().fit_transform(X=X)
6 Feature_Selector9 = BorutaShap(
7 model=model, importance_measure="shap", classification=True
8 )
---> 10 Feature_Selector9.fit(X=X_encoded, y=y, n_trials=100, random_state=0)
12 # Returns Boxplot of features
13 Feature_Selector9.plot(X_size=12, figsize=(8, 6), y_scale="log", which_features="all")
File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:473, in BorutaShap.fit(self, X, y, sample_weight, n_trials, random_state, sample, train_or_test, normalize, verbose, stratify)
471 self.hits += hits
472 self.history_hits = np.vstack((self.history_hits, self.hits))
--> 473 self.test_features(iteration=trial+1)
475 self.store_feature_importance()
476 self.calculate_rejected_accepted_tentative(verbose=verbose)
File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:934, in BorutaShap.test_features(self, iteration)
919 def test_features(self, iteration):
921 """
922 For each feature with an undetermined importance perform a two-sided test of equality
923 with the maximum shadow value to determine if it is statistcally better
(...)
931 Two arrays of the names of the accepted and rejected columns at that instance
932 """
--> 934 acceptance_p_values = self.binomial_H0_test(self.hits,
935 n=iteration,
936 p=0.5,
937 alternative='greater')
939 regect_p_values = self.binomial_H0_test(self.hits,
940 n=iteration,
941 p=0.5,
942 alternative='less')
944 # [1] as function returns a tuple
File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:885, in BorutaShap.binomial_H0_test(array, n, p, alternative)
878 @staticmethod
879 def binomial_H0_test(array, n, p, alternative):
880 """
881 Perform a test that the probability of success is p.
882 This is an exact, two-sided test of the null hypothesis
883 that the probability of success in a Bernoulli experiment is p
884 """
--> 885 return [binomtest(x, n=n, p=p, alternative=alternative).pvalue for x in array]
File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:885, in <listcomp>(.0)
878 @staticmethod
879 def binomial_H0_test(array, n, p, alternative):
880 """
881 Perform a test that the probability of success is p.
882 This is an exact, two-sided test of the null hypothesis
883 that the probability of success in a Bernoulli experiment is p
884 """
--> 885 return [binomtest(x, n=n, p=p, alternative=alternative).pvalue for x in array]
File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/scipy/stats/_binomtest.py:287, in binomtest(k, n, p, alternative)
202 def binomtest(k, n, p=0.5, alternative='two-sided'):
203 """
204 Perform a test that the probability of success is p.
205
(...)
285
286 """
--> 287 k = _validate_int(k, 'k', minimum=0)
288 n = _validate_int(n, 'n', minimum=1)
289 if k > n:
File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/scipy/_lib/_util.py:352, in _validate_int(k, name, minimum)
350 k = operator.index(k)
351 except TypeError:
--> 352 raise TypeError(f'{name} must be an integer.') from None
353 if minimum is not None and k < minimum:
354 raise ValueError(f'{name} must be an integer not less '
355 f'than {minimum}') from None
TypeError: k must be an integer.
Could you give some advice?
尊敬的 Sir / 女士:
根据您的修订版,我在 'binomtest(x, n=n, p=p, alternative=alternative).pvalue' 中还遇到了另一个问题:
0%| | 0/100 [00:08<?, ?it/s] --------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[75], line 10 5 X_encoded = X#OrdinalEncoderPandas().fit_transform(X=X) 6 Feature_Selector9 = BorutaShap( 7 model=model, importance_measure="shap", classification=True 8 ) ---> 10 Feature_Selector9.fit(X=X_encoded, y=y, n_trials=100, random_state=0) 12 # Returns Boxplot of features 13 Feature_Selector9.plot(X_size=12, figsize=(8, 6), y_scale="log", which_features="all") File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:473, in BorutaShap.fit(self, X, y, sample_weight, n_trials, random_state, sample, train_or_test, normalize, verbose, stratify) 471 self.hits += hits 472 self.history_hits = np.vstack((self.history_hits, self.hits)) --> 473 self.test_features(iteration=trial+1) 475 self.store_feature_importance() 476 self.calculate_rejected_accepted_tentative(verbose=verbose) File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:934, in BorutaShap.test_features(self, iteration) 919 def test_features(self, iteration): 921 """ 922 For each feature with an undetermined importance perform a two-sided test of equality 923 with the maximum shadow value to determine if it is statistcally better (...) 931 Two arrays of the names of the accepted and rejected columns at that instance 932 """ --> 934 acceptance_p_values = self.binomial_H0_test(self.hits, 935 n=iteration, 936 p=0.5, 937 alternative='greater') 939 regect_p_values = self.binomial_H0_test(self.hits, 940 n=iteration, 941 p=0.5, 942 alternative='less') 944 # [1] as function returns a tuple File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:885, in BorutaShap.binomial_H0_test(array, n, p, alternative) 878 @staticmethod 879 def binomial_H0_test(array, n, p, alternative): 880 """ 881 Perform a test that the probability of success is p. 882 This is an exact, two-sided test of the null hypothesis 883 that the probability of success in a Bernoulli experiment is p 884 """ --> 885 return [binomtest(x, n=n, p=p, alternative=alternative).pvalue for x in array] File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/BorutaShap.py:885, in <listcomp>(.0) 878 @staticmethod 879 def binomial_H0_test(array, n, p, alternative): 880 """ 881 Perform a test that the probability of success is p. 882 This is an exact, two-sided test of the null hypothesis 883 that the probability of success in a Bernoulli experiment is p 884 """ --> 885 return [binomtest(x, n=n, p=p, alternative=alternative).pvalue for x in array] File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/scipy/stats/_binomtest.py:287, in binomtest(k, n, p, alternative) 202 def binomtest(k, n, p=0.5, alternative='two-sided'): 203 """ 204 Perform a test that the probability of success is p. 205 (...) 285 286 """ --> 287 k = _validate_int(k, 'k', minimum=0) 288 n = _validate_int(n, 'n', minimum=1) 289 if k > n: File /environment/miniconda3/envs/p39/lib/python3.9/site-packages/scipy/_lib/_util.py:352, in _validate_int(k, name, minimum) 350 k = operator.index(k) 351 except TypeError: --> 352 raise TypeError(f'{name} must be an integer.') from None 353 if minimum is not None and k < minimum: 354 raise ValueError(f'{name} must be an integer not less ' 355 f'than {minimum}') from None TypeError: k must be an integer.
您能给出一些建议吗?
在_util.py中将350~352行注释掉就好
Error: When running:
from BorutaShap import BorutaShap
shows error:ImportError: cannot import name 'binom_test' from 'scipy.stats'
Steps to reproduce the behavior:
Reason: The function scipy.stats.binom_test() was removed from SciPy 1.12.0. There is an alternative, scipy.stats.binomtest().