Open doxgxxn opened 1 year ago
import pickle
with open('abc.pkl', 'wb') as fw: pickle.dump(pl, fw)
with open('abc.pkl', 'rb') as fr: save_pl = pickle.load(fr)
from sklearn.decomposition import PCA
pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train)
pd.options.display.max_columns=300
df.filter(like='test')
steps = [('sk', StandardScaler())]
pl = pipeline(steps, verbose=True)
from sklearn.model_selection import GridserchCV
params = {"sk__random_state": [0,1,2,3]}
gs = GridserchCV(pl, params,
scoring='accuray', cv=4, n_jobs=-1 )
gs.fit(X_train, y_train)
pl.steps
gs.bestparams gs.bestscore best_model = gs.bestestimator
gs.cvresults <<< 전체 결과를 딕셔너리로 반환
pred_train = best_model.predict(X_train, y_train)
params = { "process_pipeline__con_colimputerstrategy": ["mean", "median"], "model__C":[0.01, 0.1, 1, 10] }
cat_col = Pipeline([ ("imputer", SimpleImputer(strategy="most_frequent")), ("ohe", OneHotEncoder(handle_unknown='ignore')) ]) con_col = Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("sc", StandardScaler()) ])
process_pipeline = ColumnTransformer([ ("cat_col", cat_col, category_columns), ("con_col", con_col, continuous_columns) ])
pl4 = Pipeline([ ("process_pipeline", process_pipeline), ("model", LogisticRegression(max_iter=2000, random_state=0)) ])
gs = GridSearchCV(pl4, params, scoring='accuracy', cv=5, n_jobs=-1)
Learned (배운 점)
import pickle
with open('abc.pkl', 'wb') as fw: pickle.dump(pl, fw)
with open('abc.pkl', 'rb') as fr: save_pl = pickle.load(fr)
from sklearn.decomposition import PCA
pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train)
pd.options.display.max_columns=300
df.filter(like='test')
steps = [('sk', StandardScaler())]
pl = pipeline(steps, verbose=True)
from sklearn.model_selection import GridserchCV
params = {"sk__random_state": [0,1,2,3]}
gs = GridserchCV(pl, params,
n_iter=30,
gs.fit(X_train, y_train)
pl.steps
gs.bestparams gs.bestscore best_model = gs.bestestimator
gs.cvresults <<< 전체 결과를 딕셔너리로 반환
pred_train = best_model.predict(X_train, y_train)
params = { "process_pipeline__con_colimputerstrategy": ["mean", "median"], "model__C":[0.01, 0.1, 1, 10] }
cat_col = Pipeline([ ("imputer", SimpleImputer(strategy="most_frequent")), ("ohe", OneHotEncoder(handle_unknown='ignore')) ]) con_col = Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("sc", StandardScaler()) ])
process_pipeline = ColumnTransformer([ ("cat_col", cat_col, category_columns), ("con_col", con_col, continuous_columns) ])
pl4 = Pipeline([ ("process_pipeline", process_pipeline), ("model", LogisticRegression(max_iter=2000, random_state=0)) ])
gs = GridSearchCV(pl4, params, scoring='accuracy', cv=5, n_jobs=-1)
gs.fit(X_train, y_train)