Closed browshanravan closed 4 years ago
Thank you for your report, can you post the full code that causes this error?
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from pipelinehelper import PipelineHelper
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
dtypes = {'Sex':'category', 'Embarked':'category'}
desired_columns = ["Survived", "Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
titanic = pd.read_csv("http://bit.ly/kaggletrain", usecols= desired_columns, dtype= dtypes)
SEED_VALUE=42
X = titanic.drop("Survived", axis=1)
y = titanic["Survived"]
fill_imputer_num = ["Age"]
fill_imputer_num_pipeline = Pipeline(steps=
[
("num_imputer", SimpleImputer(missing_values=np.nan, strategy="mean")),
]
)
fill_impute_cat = ['Sex', 'Embarked']
fill_imputer_cat_pipeline = Pipeline(steps=
[
("cat_imputer", SimpleImputer(missing_values=np.nan, strategy='most_frequent', fill_value='missing')),
("onehot", OneHotEncoder())
]
)
preprosessor = ColumnTransformer(transformers=
[
("N_Fimp", fill_imputer_num_pipeline, fill_imputer_num),
("C_Fimp", fill_imputer_cat_pipeline, fill_impute_cat),
]
)
pipe = Pipeline(steps=
[
("preprosessor", preprosessor),
('clf', PipelineHelper([
('ExtraTreesClassifier', ExtraTreesClassifier(n_jobs=-1, random_state=SEED_VALUE)),
('RandomForestClassifier', RandomForestClassifier(n_jobs=-1, random_state=SEED_VALUE)),
("GaussianNB", GaussianNB())
])),
]
)
param_grid = {
'clf__selected_model': pipe.named_steps['clf'].generate({
"ExtraTreesClassifier__n_estimators": [16, 32, 100, 300, 500],
"ExtraTreesClassifier__criterion": ["gini", "entropy"],
"ExtraTreesClassifier__warm_start": [True, False],
"RandomForestClassifier__n_estimators": [16, 32, 100, 300, 500],
"RandomForestClassifier__criterion": ["gini", "entropy"],
"RandomForestClassifier__warm_start": [True, False],
}),
}
grid = GridSearchCV(estimator= pipe, param_grid= param_grid, n_jobs=-1,
scoring="roc_auc", cv=10)
grid.fit(X, y)
print(grid.best_score_)
It's me again :)
The package works great however when trying to use
GaussianNB()
,ExtraTreesClassifier()
orRandomForestClassifier()
, within thePipelineHelper()
, I get the following error.It works fine for other classifiers I have tried!