TeorinKim / NetworkAnalysis

0 stars 0 forks source link

로지스틱 회귀분석 #8

Open TeorinKim opened 3 years ago

TeorinKim commented 3 years ago
from sklearn.linear_model import LogisticRegression

import pandas as pd

passengers = pd.read_csv("D:/ML/titanic/train.csv")
print(passengers.shape)
print(passengers.head())

image

passengers['Sex'] = passengers['Sex'].map({'female':1,'male':0})

passengers['Age'].fillna(value=passengers['Age'].mean(), inplace=True)

passengers['FirstClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 1 else 0)
passengers['SecondClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 2 else 0)

features = passengers[['Sex', 'Age', 'FirstClass', 'SecondClass']]
survival = passengers['Survived']

train, test 나누기

from sklearn.model_selection import train_test_split
train_features, test_features, train_labels, test_labels = train_test_split(features, survival)

표준화

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

로지스틱 회귀분석 시작

from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(train_features, train_labels)

print(model.score(train_features, train_labels))

print(model.score(test_features, test_labels))

print(model.coef_)

특정 값 넣어서 예측결과 확인

import numpy as np

Jack = np.array([0.0, 20.0, 0.0, 0.0])
Rose = np.array([1.0, 17.0, 1.0, 0.0])
ME = np.array([0.0, 32.0, 1.0, 0.0])

sample_passengers = np.array([Jack, Rose, ME])

sample_passengers = scaler.transform(sample_passengers)

print(model.predict(sample_passengers))

확률 확인

print(model.predict_proba(sample_passengers))