Closed Ariaspect closed 2 years ago
lab01.py
import pandas as pd import torch import torch.nn.functional as F filename = "Breast_cancer_train.csv" df = pd.read_csv(filename) x_train = torch.tensor(df.loc[:, ['mean_radius', 'mean_texture', 'mean_perimeter', 'mean_area', 'mean_smoothness']].values.tolist(), dtype=torch.float32) y_train = torch.tensor(df['diagnosis'].values.tolist(), dtype=torch.float32).unsqueeze(1) def get_prediction(x_predict): prediction = torch.sigmoid(x_predict.matmul(W) + b) prediction = (prediction >= torch.tensor([0.5])).int() return prediction W = torch.zeros((5,1), requires_grad=True, dtype=torch.float32) b = torch.zeros(1, requires_grad=True, dtype=torch.float32) lr = 1e-5 sgd_optim = torch.optim.SGD([W, b], lr=lr) nb_epochs = 2000 for epoch in range(nb_epochs): hypothesis = torch.sigmoid(x_train.matmul(W) + b) cost = F.binary_cross_entropy(hypothesis, y_train) sgd_optim.zero_grad() cost.backward() sgd_optim.step() if epoch % 100 == 0: correct = get_prediction(x_train) == y_train.int() acc = correct.sum().item()/len(correct) print(f'epoch [{epoch}]: cost: {cost}, acc: {acc:.5f}') if __name__=="__main__": x_test = torch.tensor([[17.99,10.38,122.8,1001,0.1184],[13.54,14.36,87.46,566.3,0.09779]], dtype=torch.float32) print(get_prediction(x_test))
... ... ... epoch [19700]: cost: 0.3231341540813446, acc: 0.85928 epoch [19800]: cost: 0.323068767786026, acc: 0.85928 epoch [19900]: cost: 0.32300370931625366, acc: 0.85928 tensor([[0], [1]], dtype=torch.int32)
lab02.py
import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F filename = "titanic_train.csv" df = pd.read_csv(filename, index_col=0) x_train = torch.tensor((df_x := df.drop(columns=['PassengerId', 'Survived'])).values.tolist(), dtype=torch.float32) y_train = torch.tensor(df['Survived'].values.tolist(), dtype=torch.float32).unsqueeze(1) def get_prediction(x_predict): prediction = model(x_predict) prediction = (prediction >= torch.tensor([0.5])).int() return prediction # df_x = df_x.drop(columns=['Emb_1', 'Emb_2', 'Emb_3']) # x_train = torch.tensor(df_x.values.tolist(), dtype=torch.float32) class BinaryClassifier(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(14, 1) self.sigmoid = nn.Sigmoid() def forward(self, x): return self.sigmoid(self.linear(x)) lr = 1e-1 model = BinaryClassifier() adam_optim = torch.optim.Adagrad(model.parameters(), lr=lr) # sgd_optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) nb_epochs = 1000 for epoch in range(nb_epochs): hypothesis = model(x_train) cost = F.binary_cross_entropy(hypothesis, y_train) adam_optim.zero_grad() cost.backward() adam_optim.step() if epoch % 10 == 0: correct = get_prediction(x_train) == y_train.int() acc = correct.sum().item()/len(correct) print(f'epoch [{epoch}]: cost: {cost}, acc: {acc:.5f}')
... ... ... epoch [970]: cost: 0.4183887541294098, acc: 0.82197 epoch [980]: cost: 0.4183367192745209, acc: 0.82197 epoch [990]: cost: 0.41828653216362, acc: 0.82197
Great performance!
Results: (I executed only one since initial value is fixed)
Problem 1 test : 90.0% Problem 2 test : 88.0%
lab01.py
lab02.py