import arraymancer, random
# This is an early minimum viable example of handwritten digits recognition.
# It uses convolutional neural networks to achieve high accuracy.
#
# Data files (MNIST) can be downloaded here http://yann.lecun.com/exdb/mnist/
# and must be decompressed in "./build/" (or change the path "build/..." below)
#
# Make the results reproducible by initializing a random seed
randomize(42)
let
ctx = newContext Tensor[float32] # Autograd/neural network graph
n = 32 # Batch size
let
# Training data is 60k 28x28 greyscale images from 0-255,
# neural net prefers input rescaled to [0, 1] or [-1, 1]
x_train = read_mnist_images("build/train-images.idx3-ubyte").astype(float32) / 255'f32
# Change shape from [N, H, W] to [N, C, H, W], with C = 1 (unsqueeze). Convolution expect 4d tensors
# And store in the context to track operations applied and build a NN graph
X_train = ctx.variable x_train.unsqueeze(1)
# Labels are uint8, we must convert them to int
y_train = read_mnist_labels("build/train-labels.idx1-ubyte").astype(int)
# Idem for testing data (10000 images)
x_test = read_mnist_images("build/t10k-images.idx3-ubyte").astype(float32) / 255'f32
X_test = ctx.variable x_test.unsqueeze(1)
y_test = read_mnist_labels("build/t10k-labels.idx1-ubyte").astype(int)
# Configuration of the neural network
network ctx, DemoNet:
layers:
x: Input([1, 28, 28])
cv1: Conv2D(x.out_shape, 20, 5, 5)
mp1: MaxPool2D(cv1.out_shape, (2,2), (0,0), (2,2))
cv2: Conv2D(mp1.out_shape, 50, 5, 5)
mp2: MaxPool2D(cv2.out_shape, (2,2), (0,0), (2,2))
fl: Flatten(mp2.out_shape)
hidden: Linear(fl.out_shape, 500)
classifier: Linear(500, 10)
forward x:
x.cv1.relu.mp1.cv2.relu.mp2.fl.hidden.relu.classifier
let model = ctx.init(DemoNet)
You can do:
let model2 = ctx.init(DemoNet)
or
let model2 = model
or
var model2: DemoNet
deepCopy(model2, model)
Differences
case 1: The network is the same topologically speaking but weights and biases are different (same type different instances)
case 2: Both networks are the same, updating one will change the other due to reference semantics (shallow copy)
case 3: Both networks are the same at start including weights and biases but updating one will NOT update the other. (deep copy)
If we take this example:
You can do:
let model2 = ctx.init(DemoNet)
or
let model2 = model
or
Differences
case 1: The network is the same topologically speaking but weights and biases are different (same type different instances) case 2: Both networks are the same, updating one will change the other due to reference semantics (shallow copy) case 3: Both networks are the same at start including weights and biases but updating one will NOT update the other. (deep copy)