Open henryliangt opened 1 year ago
class HiddenLayer(object):
def __init__(self,n_in, n_out, activation_last_layer='tanh',activation='tanh', W=None, b=None):
self.input=None
self.activation=Activation(activation).f
# activation deriv of last layer
self.activation_deriv=None
if activation_last_layer:
self.activation_deriv=Activation(activation_last_layer).f_deriv
# we randomly assign small values for the weights as the initiallization
self.W = np.random.uniform(
low=-np.sqrt(6. / (n_in + n_out)),
high=np.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
)
# if activation == 'logistic':
# self.W *= 4
# we set the size of bias as the size of output dimension
self.b = np.zeros(n_out,)
# we set he size of weight gradation as the size of weight
self.grad_W = np.zeros(self.W.shape)
self.grad_b = np.zeros(self.b.shape)
# the forward and backward progress (in the hidden layer level) for each training epoch
# please learn the week2 lec contents carefully to understand these codes.
def forward(self, input):
'''
:type input: numpy.array
:param input: a symbolic tensor of shape (n_in,)
'''
lin_output = np.dot(input, self.W) + self.b
self.output = (
lin_output if self.activation is None
else self.activation(lin_output)
)
self.input=input
return self.output
def backward(self, delta, output_layer=False):
self.grad_W = np.atleast_2d(self.input).T.dot(np.atleast_2d(delta))
self.grad_b = delta
if self.activation_deriv:
delta = delta.dot(self.W.T) * self.activation_deriv(self.input)
return delta
class MLP:
# for initiallization, the code will create all layers automatically based on the provided parameters.
def __init__(self, layers, activation=[None,'tanh','tanh']):
"""
:param layers: A list containing the number of units in each layer.
Should be at least two values
:param activation: The activation function to be used. Can be
"logistic" or "tanh"
"""
### initialize layers
self.layers=[]
self.params=[]
self.activation=activation
for i in range(len(layers)-1):
self.layers.append(HiddenLayer(layers[i],layers[i+1],activation[i],activation[i+1]))
# forward progress: pass the information through the layers and out the results of final output layer
def forward(self,input):
for layer in self.layers:
output=layer.forward(input)
input=output
return output
# define the objection/loss function, we use mean sqaure error (MSE) as the loss
# you can try other loss, such as cross entropy.
# when you try to change the loss, you should also consider the backward formula for the new loss as well!
def criterion_MSE(self,y,y_hat):
activation_deriv=Activation(self.activation[-1]).f_deriv
# MSE
error = y-y_hat
loss=error**2
# calculate the MSE's delta of the output layer
delta=-error*activation_deriv(y_hat)
# return loss and delta
return loss,delta
# backward progress
def backward(self,delta):
delta=self.layers[-1].backward(delta,output_layer=True)
for layer in reversed(self.layers[:-1]):
delta=layer.backward(delta)
# update the network weights after backward.
# make sure you run the backward function before the update function!
def update(self,lr):
for layer in self.layers:
layer.W -= lr * layer.grad_W
layer.b -= lr * layer.grad_b
# define the training function
# it will return all losses within the whole training process.
def fit(self,X,y,learning_rate=0.1, epochs=100):
"""
Online learning.
:param X: Input data or features
:param y: Input targets
:param learning_rate: parameters defining the speed of learning
:param epochs: number of times the dataset is presented to the network for learning
"""
X=np.array(X)
y=np.array(y)
to_return = np.zeros(epochs)
for k in range(epochs):
loss=np.zeros(X.shape[0])
for it in range(X.shape[0]):
i=np.random.randint(X.shape[0])
# forward pass
y_hat = self.forward(X[i])
# backward pass
loss[it],delta=self.criterion_MSE(y[i],y_hat)
self.backward(delta)
y
# update
self.update(learning_rate)
to_return[k] = np.mean(loss)
return to_return
# define the prediction function
# we can use predict function to predict the results of new data, by using the well-trained network.
def predict(self, x):
x = np.array(x)
output = np.zeros(x.shape[0])
for i in np.arange(x.shape[0]):
output[i] = self.forward(x[i,:])
return output
nn = MLP([2,3,1], [None,'logistic','tanh'])
input_data = dataset[:,0:2]
output_data = dataset[:,2]
MSE = nn.fit(input_data, output_data, learning_rate=0.001, epochs=500)
print('loss:%f'%MSE[-1])
pl.figure(figsize=(15,4))
pl.plot(MSE)
pl.grid()
nn = MLP([2,3,1], [None,'logistic','tanh'])
input_data = dataset[:,0:2]
output_data = dataset[:,2]
MSE = nn.fit(input_data, output_data, learning_rate=0.0001, epochs=500)
print('loss:%f'%MSE[-1])
pl.figure(figsize=(15,4))
pl.plot(MSE)
pl.grid()
nn = MLP([2,3,1], [None,'logistic','tanh'])
input_data = dataset[:,0:2]
output_data = dataset[:,2]
MSE = nn.fit(input_data, output_data, learning_rate=0.1, epochs=500)
print('loss:%f'%MSE[-1])
pl.figure(figsize=(15,4))
pl.plot(MSE)
pl.grid()
output = nn.predict(input_data)
pl.figure(figsize=(8,6))
pl.scatter(output_data, output, s=100)
pl.xlabel('Targets')
pl.ylabel('MLP output')
pl.grid()
xx, yy = np.meshgrid(np.arange(-2, 2, .02),np.arange(-2, 2, .02))
Z = nn.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
pl.figure(figsize=(15,7))
pl.subplot(1,2,1)
pl.pcolormesh(xx, yy, Z>0, cmap='cool')
pl.scatter(input_data[:,0], input_data[:,1], c=[(['b', 'r'])[d>0] for d in output_data], s=100)
pl.xlim(-2, 2)
pl.ylim(-2, 2)
pl.grid()
pl.title('Targets')
pl.subplot(1,2,2)
pl.pcolormesh(xx, yy, Z>0, cmap='cool')
pl.scatter(input_data[:,0], input_data[:,1], c=[(['b', 'r'])[d>0] for d in output], s=100)
pl.xlim(-2, 2)
pl.ylim(-2, 2)
pl.grid()
pl.title('MLP output')
class Activation(object):
def __tanh(self, x):
def __tanh_deriv(self, a):
def __logistic(self, x):
def __logistic_deriv(self, x):
def __init__(self, a):
if activation == 'logistic':
elif activation == 'tanh':
class HiddenLayer(object):
def __init__(self,n_in, n_out, activation_last_layer='tanh',activation='tanh', W=None, b=None):
def forward(self, input)
return self.output
def backward(self, delta, output_layer=False):
return delta
class MLP:
def __init__(self, layers, activation=['None', 'tanh', 'tanh']):
def forward(self, input):
return output
def criterion_MSE(self, y, y_hat):
return loss, delta
def backward(self, delta):
def update(self, lr):
def fit(self, X, y, learning_rate=0.1, epochs=100):
return to_return
def predict(self, x):
x = np.array(x)
return output