eldenpark / ml-practice

0 stars 0 forks source link

numpy ann #1

Open eldenpark opened 7 years ago

eldenpark commented 7 years ago
def sig(x, deriv = False):
    if(deriv == True):
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))

import numpy as np

x = np.array([[0,1],
              [1,0],
              [1,1]])

y = np.array([[0,1],
              [1,1],
              [1,0]])

np.random.seed(1)

w0 = 2 * np.random.random((2,2)) - 1
w1 = 2 * np.random.random((2,2)) - 1

for i in range(1000):

    # Forward
    z1 = np.dot(x, w0)
    a1 = sig(z1)
    z2 = np.dot(a1, w1)
    a2 = sig(z2)

    # Back
#     loss_l2 = 0.5 * np.power(y - a2, 2)
    deriv_loss_a2 = a2 - y # l2 error
#     print(deriv_loss_a2)
    deriv_a2_z2 = sig(a2, True)
    delta2 = deriv_loss_a2 * deriv_a2_z2

    deriv_loss_a1 = np.dot(delta2, w1.T) # l1 error
    deriv_a1_z1 = sig(a1, True)
    delta1 = deriv_loss_a1 * deriv_a1_z1

    w1 -= np.dot(a1.T, delta2)
    w0 -= np.dot(x.T, delta1)

a2
MyeongKim commented 7 years ago
# -*- coding: utf-8 -*-
import numpy as np

def nonlin(x, deriv=False):
    if(deriv == True):
        return x*(1-x)
    return 1/(1+np.exp(-x))

X = np.array([
    [0,0,1],
    [0,1,1],
    [1,0,1],
    [1,1,1]
])

y = np.array([
    [0],
    [1],
    [1],
    [0]
])

np.random.seed(1)

syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1

for j in xrange(60000):
    l0 = X
    l1 = nonlin(np.dot(l0, syn0))
    l2 = nonlin(np.dot(l1, syn1))

    l2_error = y - l2

    if (j % 10000) == 0:
        print("Error: " + str(np.mean(np.abs(l2_error))))

    l2_delta = l2_error*nonlin(l2, deriv=True)
    # l1 이 얼마나 l2_error에 영향을 줬는지
    l1_error = l2_delta.dot(syn1.T)

    l1_delta = l1_error*nonlin(l1, deriv=True)

    syn1 += l1.T.dot(l2_delta)
    syn0 += l0.T.dot(l1_delta)

출처 : https://iamtrask.github.io/2015/07/12/basic-python-network/

sungjinoh commented 7 years ago
# coding: utf-8
import numpy as np
import matplotlib.pyplot as plt
from utils import *
from mnist import load_mnist
from layers import *
from collections import OrderedDict

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.param = {}
        self.param['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.param['b1'] = np.zeros(hidden_size)
        self.param['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.param['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.param['W1'], self.param['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.param['W2'], self.param['b2'])
        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)

        #return cross_entropy_error(y, t)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, y):
        y = self.predict(x)
        y = np.argmax(y, axis=1)

        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.param['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.param['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.param['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.param['b2'])

        return grads

    def gradient(self, x, t):
        self.loss(x, t)

        # 역전파
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

train_loss_list = []

#hyper parameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.gradient(x_batch, t_batch)

    for key in network.param.keys():
        network.param[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    if i % 1000 == 0:
        print('loop : ',i)
    train_loss_list.append(loss)

x = np.arange(len(train_loss_list))
plt.plot(x, train_loss_list, label='train acc')
plt.show()
# coding: utf-8
import numpy as np
from utils import *

class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = sigmoid(x)
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx

class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b

        self.x = None
        self.original_x_shape = None
        # 가중치와 편향 매개변수의 미분
        self.dW = None
        self.db = None

    def forward(self, x):
        # 텐서 대응
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        dx = dx.reshape(*self.original_x_shape)  # 입력 데이터 모양 변경(텐서 대응)
        return dx

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None # 손실함수
        self.y = None    # softmax의 출력
        self.t = None    # 정답 레이블(원-핫 인코딩 형태)

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 정답 레이블이 원-핫 인코딩 형태일 때
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size

        return dx