I tested the fit functions of almost all classifiers and regressors.
As a result of the tests, the following four models: SLIMClassifier, BayesianRuleSetClassifier, SlipperClassifier, and TaoTreeRegressor, failed to fit.
_____________________________________________ test_fit_classifier[classifier1] ______________________________________________
classifier = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})
@pytest.mark.parametrize("classifier", classifiers)
def test_fit_classifier(classifier) -> None:
X, y = make_classification(n_samples=25, n_features=5)
> classifier_ = clone(classifier)
tests\test_fit_print.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
estimator = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})
def clone(estimator, *, safe=True):
"""Construct a new unfitted estimator with the same parameters.
Clone does a deep copy of the model in an estimator
without actually copying attached data. It returns a new estimator
with the same parameters that has not been fitted on any data.
Parameters
----------
estimator : {list, tuple, set} of estimator instance or a single \
estimator instance
The estimator or group of estimators to be cloned.
safe : bool, default=True
If safe is False, clone will fall back to a deep copy on objects
that are not estimators.
Returns
-------
estimator : object
The deep copy of the input, an estimator if input is an estimator.
Notes
-----
If the estimator's `random_state` parameter is an integer (or if the
estimator doesn't have a `random_state` parameter), an *exact clone* is
returned: the clone and the original estimator will give the exact same
results. Otherwise, *statistical clone* is returned: the clone might
return different results from the original estimator. More details can be
found in :ref:`randomness`.
"""
estimator_type = type(estimator)
# XXX: not handling dictionaries
if estimator_type in (list, tuple, set, frozenset):
return estimator_type([clone(e, safe=safe) for e in estimator])
elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
if not safe:
return copy.deepcopy(estimator)
else:
if isinstance(estimator, type):
raise TypeError(
"Cannot clone object. "
+ "You should provide an instance of "
+ "scikit-learn estimator instead of a class."
)
else:
raise TypeError(
"Cannot clone object '%s' (type %s): "
"it does not seem to be a scikit-learn "
"estimator as it does not implement a "
"'get_params' method." % (repr(estimator), type(estimator))
)
klass = estimator.__class__
> new_object_params = estimator.get_params(deep=False)
E TypeError: CorelsClassifier.get_params() got an unexpected keyword argument 'deep'
..\..\..\Anaconda3\envs\py310\lib\site-packages\sklearn\base.py:87: TypeError
_____________________________________________ test_fit_classifier[classifier5] ______________________________________________
classifier = BayesianRuleSetClassifier()
@pytest.mark.parametrize("classifier", classifiers)
def test_fit_classifier(classifier) -> None:
X, y = make_classification(n_samples=25, n_features=5)
classifier_ = clone(classifier)
> classifier_.fit(X, y)
tests\test_fit_print.py:53:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = BayesianRuleSetClassifier(alpha_l=[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
beta_l=[-0.0...
25200.0, 21000.0, 12000.0, 4500.0, 1000.0,
100.0])
X = X0 X1 X2 X3 X4
0 1.856723 -0.676788 -2.081929 0.139416 1.345762
1 0.942105...948 2.517298
23 -0.556286 -2.165002 -0.522723 1.466807 -0.796446
24 -1.369548 1.188797 -0.544919 -0.542191 -0.878127
y = array([1., 1., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1.,
1., 0., 0., 0., 1., 1., 0., 0.])
feature_names = ['X0', 'X1', 'X2', 'X3', 'X4'], init = [], verbose = False
def fit(self, X, y, feature_names: list = None, init=[], verbose=False):
'''
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training data
y : array_like, shape = [n_samples]
Labels
feature_names : array_like, shape = [n_features], optional (default: [])
String labels for each feature.
If empty and X is a DataFrame, column labels are used.
If empty and X is not a DataFrame, then features are simply enumerated
'''
# check inputs
self.attr_level_num = defaultdict(int) # any missing value defaults to 0
self.attr_names = []
X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
np.random.seed(self.random_state)
# convert to pandas DataFrame
X = pd.DataFrame(X, columns=feature_names)
for i, name in enumerate(X.columns):
self.attr_level_num[name] += 1
self.attr_names.append(name)
self.attr_names_orig = deepcopy(self.attr_names)
self.attr_names = list(set(self.attr_names))
# set up patterns
self._set_pattern_space()
# parameter checking
if self.alpha_l is None or self.beta_l is None or len(self.alpha_l) != self.maxlen or len(
self.beta_l) != self.maxlen:
if verbose:
print('No or wrong input for alpha_l and beta_l - the model will use default parameters.')
self.C = [1.0 / self.maxlen] * self.maxlen
self.C.insert(0, -1)
self.alpha_l = [10] * (self.maxlen + 1)
self.beta_l = [10 * self.pattern_space[i] / self.C[i] for i in range(self.maxlen + 1)]
else:
self.alpha_l = [1] + list(self.alpha_l)
self.beta_l = [1] + list(self.beta_l)
# setup
self._generate_rules(X, y, verbose)
n_rules_current = len(self.rules_)
self.rules_len_list = [len(rule) for rule in self.rules_]
maps = defaultdict(list)
T0 = 1000 # initial temperature for simulated annealing
split = 0.7 * self.num_iterations
# run simulated annealing
for chain in range(self.num_chains):
# initialize with a random pattern set
if init != []:
rules_curr = init.copy()
else:
> assert n_rules_current > 1, f'Only {n_rules_current} potential rules found, change hyperparams to allow for more'
E AssertionError: Only 0 potential rules found, change hyperparams to allow for more
imodels\rule_set\brs.py:147: AssertionError
--------------------------------------------------- Captured stdout call ----------------------------------------------------
mat.shape (25, 13626)
p1.shape (13626,) pp.shape (13626,) cond_entropy.shape
_____________________________________________ test_fit_classifier[classifier12] _____________________________________________
classifier = <imodels.tree.figs.FIGSClassifierCV object at 0x0000024E24343580>
@pytest.mark.parametrize("classifier", classifiers)
def test_fit_classifier(classifier) -> None:
X, y = make_classification(n_samples=25, n_features=5)
> classifier_ = clone(classifier)
tests\test_fit_print.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
estimator = <imodels.tree.figs.FIGSClassifierCV object at 0x0000024E24343580>
def clone(estimator, *, safe=True):
"""Construct a new unfitted estimator with the same parameters.
Clone does a deep copy of the model in an estimator
without actually copying attached data. It returns a new estimator
with the same parameters that has not been fitted on any data.
Parameters
----------
estimator : {list, tuple, set} of estimator instance or a single \
estimator instance
The estimator or group of estimators to be cloned.
safe : bool, default=True
If safe is False, clone will fall back to a deep copy on objects
that are not estimators.
Returns
-------
estimator : object
The deep copy of the input, an estimator if input is an estimator.
Notes
-----
If the estimator's `random_state` parameter is an integer (or if the
estimator doesn't have a `random_state` parameter), an *exact clone* is
returned: the clone and the original estimator will give the exact same
results. Otherwise, *statistical clone* is returned: the clone might
return different results from the original estimator. More details can be
found in :ref:`randomness`.
"""
estimator_type = type(estimator)
# XXX: not handling dictionaries
if estimator_type in (list, tuple, set, frozenset):
return estimator_type([clone(e, safe=safe) for e in estimator])
elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
if not safe:
return copy.deepcopy(estimator)
else:
if isinstance(estimator, type):
raise TypeError(
"Cannot clone object. "
+ "You should provide an instance of "
+ "scikit-learn estimator instead of a class."
)
else:
> raise TypeError(
"Cannot clone object '%s' (type %s): "
"it does not seem to be a scikit-learn "
"estimator as it does not implement a "
"'get_params' method." % (repr(estimator), type(estimator))
)
E TypeError: Cannot clone object '<imodels.tree.figs.FIGSClassifierCV object at 0x0000024E24343580>' (type <class 'imodels.tree.figs.FIGSClassifierCV'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.
..\..\..\Anaconda3\envs\py310\lib\site-packages\sklearn\base.py:79: TypeError
______________________________________________ test_fit_regressor[regressor5] _______________________________________________
regressor = <imodels.tree.figs.FIGSRegressorCV object at 0x0000024E24343A00>
@pytest.mark.parametrize("regressor", regressors)
def test_fit_regressor(regressor) -> None:
X, y = make_regression(n_samples=25, n_features=5)
> regressor_ = clone(regressor)
tests\test_fit_print.py:59:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
estimator = <imodels.tree.figs.FIGSRegressorCV object at 0x0000024E24343A00>
def clone(estimator, *, safe=True):
"""Construct a new unfitted estimator with the same parameters.
Clone does a deep copy of the model in an estimator
without actually copying attached data. It returns a new estimator
with the same parameters that has not been fitted on any data.
Parameters
----------
estimator : {list, tuple, set} of estimator instance or a single \
estimator instance
The estimator or group of estimators to be cloned.
safe : bool, default=True
If safe is False, clone will fall back to a deep copy on objects
that are not estimators.
Returns
-------
estimator : object
The deep copy of the input, an estimator if input is an estimator.
Notes
-----
If the estimator's `random_state` parameter is an integer (or if the
estimator doesn't have a `random_state` parameter), an *exact clone* is
returned: the clone and the original estimator will give the exact same
results. Otherwise, *statistical clone* is returned: the clone might
return different results from the original estimator. More details can be
found in :ref:`randomness`.
"""
estimator_type = type(estimator)
# XXX: not handling dictionaries
if estimator_type in (list, tuple, set, frozenset):
return estimator_type([clone(e, safe=safe) for e in estimator])
elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
if not safe:
return copy.deepcopy(estimator)
else:
if isinstance(estimator, type):
raise TypeError(
"Cannot clone object. "
+ "You should provide an instance of "
+ "scikit-learn estimator instead of a class."
)
else:
> raise TypeError(
"Cannot clone object '%s' (type %s): "
"it does not seem to be a scikit-learn "
"estimator as it does not implement a "
"'get_params' method." % (repr(estimator), type(estimator))
)
E TypeError: Cannot clone object '<imodels.tree.figs.FIGSRegressorCV object at 0x0000024E24343A00>' (type <class 'imodels.tree.figs.FIGSRegressorCV'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.
..\..\..\Anaconda3\envs\py310\lib\site-packages\sklearn\base.py:79: TypeError
______________________________________________ test_fit_regressor[regressor8] _______________________________________________
regressor = TaoTreeRegressor()
@pytest.mark.parametrize("regressor", regressors)
def test_fit_regressor(regressor) -> None:
X, y = make_regression(n_samples=25, n_features=5)
regressor_ = clone(regressor)
> regressor_.fit(X, y)
tests\test_fit_print.py:60:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = TaoTreeRegressor()
X = array([[ 1.37335581e+00, 5.41363727e-01, -1.19638750e-01,
-1.72968975e-01, 3.49329569e-01],
[-1.55348...3.96825794e-01],
[ 3.04033564e-01, -1.14044020e+00, 1.57034218e-01,
1.33012401e+00, 1.06648724e-01]])
y = array([ 103.98582366, -83.95949067, 23.68672904, -136.32004647,
-86.99044435, 1.25016217, 65.46450731,... 188.81385195, 86.91833492,
-77.45621107, 34.3125444 , 92.98970577, -43.27341573,
17.77429275])
feature_names = ['X0', 'X1', 'X2', 'X3', 'X4'], sample_weight = None
def fit(self, X, y=None, feature_names=None, sample_weight=None):
"""
Params
------
_sample_weight: array-like of shape (n_samples,), default=None
Sample weights. If None, then samples are equally weighted.
Splits that would create child nodes with net zero or negative weight
are ignored while searching for a split in each node.
"""
X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
if isinstance(self, RegressorMixin):
> raise Warning('TAO Regression is not yet tested')
E Warning: TAO Regression is not yet tested
imodels\tree\tao.py:115: Warning
_______________________________________ test_fit_before_print_classifier[classifier1] _______________________________________
classifier = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})
@pytest.mark.parametrize("classifier", classifiers)
def test_fit_before_print_classifier(classifier) -> None:
> print(classifier)
tests\test_fit_print.py:65:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})
def __str__(self):
if corels_supported:
if self.str_print is not None:
return 'OptimalRuleList:\n\n' + self.str_print
else:
> return 'OptimalRuleList:\n\n' + self.rl_.__str__()
E AttributeError: 'OptimalRuleListClassifier' object has no attribute 'rl_'
imodels\rule_list\corels_wrapper.py:240: AttributeError
I tested the fit functions of almost all classifiers and regressors. As a result of the tests, the following four models: SLIMClassifier, BayesianRuleSetClassifier, SlipperClassifier, and TaoTreeRegressor, failed to fit.
Here are the test results