civisanalytics / python-glmnet

A python port of the glmnet package for fitting generalized linear models via penalized maximum likelihood.
Other
262 stars 59 forks source link

ElasticNet and LogitNet fail "check_estimator" #38

Closed stephen-hoover closed 5 years ago

stephen-hoover commented 6 years ago

If I run

from glmnet import ElasticNet, LogitNet
from sklearn.utils.estimator_checks import check_estimator
check_estimator(ElasticNet)
check_estimator(LogitNet)

then each estimator check fails.

For the ElasticNet, the error is

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-40-d2891e7905ab> in <module>()
----> 1 check_estimator(ElasticNet)

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/utils/estimator_checks.py in check_estimator(Estimator)
    263     for check in _yield_all_checks(name, estimator):
    264         try:
--> 265             check(name, estimator)
    266         except SkipTest as message:
    267             # the only SkipTest thrown currently results from not

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/utils/testing.py in wrapper(*args, **kwargs)
    289             with warnings.catch_warnings():
    290                 warnings.simplefilter("ignore", self.category)
--> 291                 return fn(*args, **kwargs)
    292
    293         return wrapper

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/utils/estimator_checks.py in check_sample_weights_list(name, estimator_orig)
    429         sample_weight = [3] * 10
    430         # Test that estimators don't raise any exception
--> 431         estimator.fit(X, y, sample_weight=sample_weight)
    432
    433

~/anaconda3/envs/civis/lib/python3.6/site-packages/glmnet/linear.py in fit(self, X, y, sample_weight, relative_penalties)
    186             sample_weight = np.ones(X.shape[0])
    187
--> 188         self._fit(X, y, sample_weight, relative_penalties)
    189
    190         if self.n_splits >= 3:

~/anaconda3/envs/civis/lib/python3.6/site-packages/glmnet/linear.py in _fit(self, X, y, sample_weight, relative_penalties)
    225
    226         _y = y.astype(dtype=np.float64, order='F', copy=True)
--> 227         _sample_weight = sample_weight.astype(dtype=np.float64, order='F',
    228                                               copy=True)
    229

AttributeError: 'list' object has no attribute 'astype'

and for the LogitNet, it's

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-42-b458d16bd33c> in <module>()
----> 1 check_estimator(LogitNet)

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/utils/estimator_checks.py in check_estimator(Estimator)
    263     for check in _yield_all_checks(name, estimator):
    264         try:
--> 265             check(name, estimator)
    266         except SkipTest as message:
    267             # the only SkipTest thrown currently results from not

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/utils/testing.py in wrapper(*args, **kwargs)
    289             with warnings.catch_warnings():
    290                 warnings.simplefilter("ignore", self.category)
--> 291                 return fn(*args, **kwargs)
    292
    293         return wrapper

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/utils/estimator_checks.py in check_sample_weights_list(name, estimator_orig)
    429         sample_weight = [3] * 10
    430         # Test that estimators don't raise any exception
--> 431         estimator.fit(X, y, sample_weight=sample_weight)
    432
    433

~/anaconda3/envs/civis/lib/python3.6/site-packages/glmnet/logistic.py in fit(self, X, y, sample_weight, relative_penalties)
    196                                            self.scoring, classifier=True,
    197                                            n_jobs=self.n_jobs,
--> 198                                            verbose=self.verbose)
    199
    200             self.cv_mean_score_ = np.atleast_1d(np.mean(cv_scores, axis=0))

~/anaconda3/envs/civis/lib/python3.6/site-packages/glmnet/util.py in _score_lambda_path(est, X, y, sample_weight, relative_penalties, cv, scoring, classifier, n_jobs, verbose)
     69             delayed(_fit_and_score)(est, scorer, X, y, sample_weight, relative_penalties,
     70                                     est.lambda_path_, train_idx, test_idx)
---> 71             for (train_idx, test_idx) in cv)
     72
     73     return scores

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
--> 779             while self.dispatch_one_batch(iterator):
    780                 self._iterating = True
    781             else:

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    623                 return False
    624             else:
--> 625                 self._dispatch(tasks)
    626                 return True
    627

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588         job = self._backend.apply_async(batch, callback=cb)
    589         self._jobs.append(job)
    590

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    109     def apply_async(self, func, callback=None):
    110         """Schedule a func to be run"""
--> 111         result = ImmediateResult(func)
    112         if callback:
    113             callback(result)

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    330         # Don't delay the application, to avoid keeping the input
    331         # arguments in memory
--> 332         self.results = batch()
    333
    334     def get(self):

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
    129
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132
    133     def __len__(self):

~/anaconda3/envs/civis/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    129
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132
    133     def __len__(self):

~/anaconda3/envs/civis/lib/python3.6/site-packages/glmnet/util.py in _fit_and_score(est, scorer, X, y, sample_weight, relative_penalties, score_lambda_path, train_inx, test_inx)
    112     """
    113     m = clone(est)
--> 114     m = m._fit(X[train_inx, :], y[train_inx], sample_weight[train_inx], relative_penalties)
    115
    116     lamb = np.clip(score_lambda_path, m.lambda_path_[-1], m.lambda_path_[0])

TypeError: only integer scalar arrays can be converted to a scalar index

I would expect that these objects should pass the check_estimator checks.

stephen-hoover commented 5 years ago

Fixed by #51 .