Open jmedlen opened 5 years ago
this is really a mistake. You can overcome that mistake:
rf = RuleFit(tree_generator=gb)
documentation should be updated.
The full example should really be like this:
from sklearn.datasets import load_boston
from rulefit import RuleFit
data = load_boston()
features = data.feature_names
X = data.data
y = data.target
rf = RuleFit()
rf.fit(X, y, feature_names=features)
If you want to have influence on the tree generator you can pass the generator as argument:
from sklearn.ensemble import GradientBoostingRegressor
gb = GradientBoostingRegressor(n_estimators=500, max_depth=10, learning_rate=0.01)
rf = RuleFit(tree_generator=gb)
rf.fit(X, y, feature_names=features)
@benman1
rulefit==0.3.1
I follow your example and got this error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File /t/pyenv/versions/py-default/lib/python3.10/site-packages/numpy/core/getlimits.py:459, in finfo.__new__(cls, dtype)
458 try:
--> 459 dtype = numeric.dtype(dtype)
460 except TypeError:
461 # In case a float instance was given
TypeError: 'numpy.dtype[bool_]' object is not callable
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Input In [89], in <module>
----> 1 rf.fit(data.data, data.target, feature_names=data.feature_names)
File /t/pyenv/versions/py-default/lib/python3.10/site-packages/rulefit/rulefit.py:447, in RuleFit.fit(self, X, y, feature_names)
445 alphas=None
446 self.lscv = LassoCV(n_alphas=n_alphas,alphas=alphas,cv=self.cv,random_state=self.random_state)
--> 447 self.lscv.fit(X_concat, y)
448 self.coef_=self.lscv.coef_
449 self.intercept_=self.lscv.intercept_
File /t/pyenv/versions/py-default/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:1616, in LinearModelCV.fit(self, X, y, sample_weight)
1614 n_l1_ratio = len(l1_ratios)
1615 if alphas is None:
-> 1616 alphas = [
1617 _alpha_grid(
1618 X,
1619 y,
1620 l1_ratio=l1_ratio,
1621 fit_intercept=self.fit_intercept,
1622 eps=self.eps,
1623 n_alphas=self.n_alphas,
1624 normalize=_normalize,
1625 copy_X=self.copy_X,
1626 )
1627 for l1_ratio in l1_ratios
1628 ]
1629 else:
1630 # Making sure alphas is properly ordered.
1631 alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))
File /t/pyenv/versions/py-default/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:1617, in <listcomp>(.0)
1614 n_l1_ratio = len(l1_ratios)
1615 if alphas is None:
1616 alphas = [
-> 1617 _alpha_grid(
1618 X,
1619 y,
1620 l1_ratio=l1_ratio,
1621 fit_intercept=self.fit_intercept,
1622 eps=self.eps,
1623 n_alphas=self.n_alphas,
1624 normalize=_normalize,
1625 copy_X=self.copy_X,
1626 )
1627 for l1_ratio in l1_ratios
1628 ]
1629 else:
1630 # Making sure alphas is properly ordered.
1631 alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))
File /t/pyenv/versions/py-default/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:183, in _alpha_grid(X, y, Xy, l1_ratio, fit_intercept, eps, n_alphas, normalize, copy_X)
179 Xy /= X_scale[:, np.newaxis]
181 alpha_max = np.sqrt(np.sum(Xy ** 2, axis=1)).max() / (n_samples * l1_ratio)
--> 183 if alpha_max <= np.finfo(float).resolution:
184 alphas = np.empty(n_alphas)
185 alphas.fill(np.finfo(float).resolution)
File /t/pyenv/versions/py-default/lib/python3.10/site-packages/numpy/core/getlimits.py:462, in finfo.__new__(cls, dtype)
459 dtype = numeric.dtype(dtype)
460 except TypeError:
461 # In case a float instance was given
--> 462 dtype = numeric.dtype(type(dtype))
464 obj = cls._finfo_cache.get(dtype, None)
465 if obj is not None:
TypeError: 'numpy.dtype[bool_]' object is not callable
dear @elcolie - it's been two years. I haven't used this package since. However, what I can see is this:
181 alpha_max = np.sqrt(np.sum(Xy ** 2, axis=1)).max() / (n_samples * l1_ratio)
--> 183 if alpha_max <= np.finfo(float).resolution:
np.finfo()
gives you basic information about a datatype, float
in this case. This is not working for some reason. Maybe your numpy is out of date?
It's supposed to work like this:
>>> np.finfo(float)
finfo(resolution=1e-15, min=-1.7976931348623157e+308, max=1.7976931348623157e+308, dtype=float64)
@christophM, anything to add?
if you want to have influence on the tree generator you can pass the generator as argument:
from sklearn.ensemble import GradientBoostingRegressor gb = GradientBoostingRegressor(n_estimators=50, max_depth=10, learning_rate=0.01) rf = RuleFit(gb)
rf.fit(X, y, feature_names=features)
Predict
rf.predict(X)
Inspect rules:
rules = rf.get_rules()
rules = rules[rules.coef != 0].sort_values("support", ascending=False)
print(rules)
ERROR TypeError Traceback (most recent call last)