Open TeorinKim opened 3 years ago
from sklearn.linear_model import LogisticRegression import pandas as pd passengers = pd.read_csv("D:/ML/titanic/train.csv") print(passengers.shape) print(passengers.head())
passengers['Sex'] = passengers['Sex'].map({'female':1,'male':0}) passengers['Age'].fillna(value=passengers['Age'].mean(), inplace=True) passengers['FirstClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 1 else 0) passengers['SecondClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 2 else 0) features = passengers[['Sex', 'Age', 'FirstClass', 'SecondClass']] survival = passengers['Survived']
train, test 나누기
from sklearn.model_selection import train_test_split train_features, test_features, train_labels, test_labels = train_test_split(features, survival)
표준화
from sklearn.preprocessing import StandardScaler scaler = StandardScaler() train_features = scaler.fit_transform(train_features) test_features = scaler.transform(test_features)
로지스틱 회귀분석 시작
from sklearn.linear_model import LogisticRegression model = LogisticRegression() model.fit(train_features, train_labels) print(model.score(train_features, train_labels)) print(model.score(test_features, test_labels)) print(model.coef_)
특정 값 넣어서 예측결과 확인
import numpy as np Jack = np.array([0.0, 20.0, 0.0, 0.0]) Rose = np.array([1.0, 17.0, 1.0, 0.0]) ME = np.array([0.0, 32.0, 1.0, 0.0]) sample_passengers = np.array([Jack, Rose, ME]) sample_passengers = scaler.transform(sample_passengers) print(model.predict(sample_passengers))
확률 확인
print(model.predict_proba(sample_passengers))
train, test 나누기
표준화
로지스틱 회귀분석 시작
특정 값 넣어서 예측결과 확인
확률 확인