Describe the bug
I'm doing some experiments about combining abess with auto-sklearn, when using MultinomialRegression for classification, the memory tends to increase very quickly and so much that it cannot be displayed on a web page, but for LinearRegression, there is no similar out-of-memory problem.
Code for Reproduction
My code is given as follows:
from pprint import pprint
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
UniformIntegerHyperparameter, UniformFloatHyperparameter
import sklearn.metrics
import autosklearn.classification
import autosklearn.pipeline.components.classification
from autosklearn.pipeline.components.base \
import AutoSklearnClassificationAlgorithm
from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \
PREDICTIONS
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import openml
from abess import MultinomialRegression
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
import time
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
class AbessClassifier(AutoSklearnClassificationAlgorithm):
def __init__(self, exchange_num, random_state=None):
self.exchange_num = exchange_num
self.random_state = random_state
self.estimator = None
def fit(self, X, y):
from abess import MultinomialRegression
self.estimator = MultinomialRegression()
self.estimator.fit(X, y)
return self
def predict(self, X):
if self.estimator is None:
raise NotImplementedError
return self.estimator.predict(X)
def predict_proba(self, X):
if self.estimator is None:
raise NotImplementedError()
return self.estimator.predict_proba(X)
@staticmethod
def get_properties(dataset_properties=None):
return {
'shortname': 'abess Classifier',
'name': 'abess logistic Classifier',
'handles_regression': False,
'handles_classification': True,
'handles_multiclass': True,
'handles_multilabel': False,
'handles_multioutput': False,
'is_deterministic': False,
# Both input and output must be tuple(iterable)
'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA],
'output': [PREDICTIONS]
}
@staticmethod
def get_hyperparameter_search_space(dataset_properties=None):
cs = ConfigurationSpace()
exchange_num=UniformIntegerHyperparameter(
name='exchange_num', lower=4, upper=6, default_value=5
)
cs.add_hyperparameters([exchange_num])
return cs
# Add abess logistic classifier component to auto-sklearn.
autosklearn.pipeline.components.classification.add_classifier(AbessClassifier)
cs = AbessClassifier.get_hyperparameter_search_space()
print(cs)
dataset = fetch_openml(data_id = int(29),as_frame=True)#507,183,44136
X=dataset.data
y=dataset.target
X.replace([np.inf,-np.inf],np.NaN,inplace=True)
## Remove rows with NaN or Inf values
inx=X[X.isna().values==True].index.unique()
X.drop(inx,inplace=True)
y.drop(inx,inplace=True)
##use dummy variables to replace classification variables:
X = pd.get_dummies(X)
## Keep only numeric columns
X = X.select_dtypes(np.number)
## Remove columns with NaN or Inf values
nan = np.isnan(X).any()[np.isnan(X).any() == True]
inf = np.isinf(X).any()[np.isinf(X).any() == True]
X = X.drop(columns = list(nan.index))
X = X.drop(columns = list(inf.index))
##Encode target labels with value between 0 and 1
le = preprocessing.LabelEncoder()
y = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape) #number of initial features
print(X_test.shape) #number of initial features
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
per_run_time_limit=10,
include={
'classifier': ['AbessClassifier'],
'feature_preprocessor': ['polynomial']
},
memory_limit=6144,
ensemble_size=1,
)
cls.fit(X_train, y_train, X_test, y_test)
predictions = cls.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
After running this code , the memory gets to about 159MB, which is not friendly for users to open an .ipynb. Again, regression does not encounter the memory-out problem.
Describe the bug I'm doing some experiments about combining abess with auto-sklearn, when using
MultinomialRegression
for classification, the memory tends to increase very quickly and so much that it cannot be displayed on a web page, but forLinearRegression
, there is no similar out-of-memory problem.Code for Reproduction
My code is given as follows:
After running this code , the memory gets to about 159MB, which is not friendly for users to open an
.ipynb
. Again, regression does not encounter the memory-out problem.