doxgxxn / data_TIL

toy project dumster and daily review of everything
1 stars 0 forks source link

4L회고 / 230707 / DAY15 #15

Open doxgxxn opened 1 year ago

doxgxxn commented 1 year ago

Learned (배운 점)

import pickle

with open('abc.pkl', 'wb') as fw: pickle.dump(pl, fw)

with open('abc.pkl', 'rb') as fr: save_pl = pickle.load(fr)

from sklearn.decomposition import PCA

pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train)

pd.options.display.max_columns=300

df.filter(like='test')

steps = [('sk', StandardScaler())]

pl = pipeline(steps, verbose=True)

from sklearn.model_selection import GridserchCV

params = {"sk__random_state": [0,1,2,3]}

gs = GridserchCV(pl, params,

n_iter=30,

    scoring='accuray',
    cv=4,
    n_jobs=-1
    )

gs.fit(X_train, y_train)

pl.steps

gs.bestparams gs.bestscore best_model = gs.bestestimator

gs.cvresults <<< 전체 결과를 딕셔너리로 반환

pred_train = best_model.predict(X_train, y_train)

params = { "process_pipeline__con_colimputerstrategy": ["mean", "median"], "model__C":[0.01, 0.1, 1, 10] }

cat_col = Pipeline([ ("imputer", SimpleImputer(strategy="most_frequent")), ("ohe", OneHotEncoder(handle_unknown='ignore')) ]) con_col = Pipeline([ ("imputer", SimpleImputer(strategy="median")), ("sc", StandardScaler()) ])

process_pipeline = ColumnTransformer([ ("cat_col", cat_col, category_columns), ("con_col", con_col, continuous_columns) ])

pl4 = Pipeline([ ("process_pipeline", process_pipeline), ("model", LogisticRegression(max_iter=2000, random_state=0)) ])

gs = GridSearchCV(pl4, params, scoring='accuracy', cv=5, n_jobs=-1)

gs.fit(X_train, y_train)