edmBernard / mxnet_example_shared_weight

small examples to test shared layer
11 stars 2 forks source link

Is it a shared weight siamese net? #1

Closed QinZiwen closed 5 years ago

QinZiwen commented 6 years ago

I follow your code, try to code conv net:

import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
from mxnet.gluon import nn
import numpy as np
import sys

ctx = mx.gpu()

batch_size = 128
num_inputs = 784
num_outputs = 10

def transform(data, label):
    data = nd.transpose(data, (2,0,1))
    return data.astype(np.float32)/255, label.astype(np.float32)

train_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=False)
test_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False)
train_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=False)
test_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False)

class siamese_net(nn.HybridBlock):
    def __init__(self, **kwargs):
        super(siamese_net, self).__init__(**kwargs)
        with self.name_scope():
            self.conv1 = nn.Conv2D(128, kernel_size=3, strides=1, padding=1, activation='relu')
            self.conv2 = nn.Conv2D(256, kernel_size=3, strides=1, padding=1, activation='relu')
            self.conv3 = nn.Conv2D(256, kernel_size=3, strides=1, padding=1, activation='relu')

            self.dense1 = nn.Dense(1024, activation='relu')
            self.dense2 = nn.Dense(num_outputs)

            self.maxpool = nn.MaxPool2D(pool_size=3, strides=2)
    def convNet(self, img):
        conv = self.conv1(img)
        conv = self.maxpool(conv)
        conv = self.conv2(conv)
        conv = self.maxpool(conv)
        conv = self.conv3(conv)
        conv = self.maxpool(conv)

        flatten = nn.Flatten()
        return flatten(conv)

    def hybrid_forward(self, F, img1, img2):
        out1 = self.convNet(img1)
        out2 = self.convNet(img2)
        out = F.concat(out1, out2, dim=1)
        out = self.dense2(self.dense1(out))
        return out

def evaluate_accuracy(data_iterator1, data_iterator2, net):
    acc = mx.metric.Accuracy()
    for i, ((data1, label1), (data2, label2)) in enumerate(zip(data_iterator1, data_iterator2)):
        data1 = data1.as_in_context(ctx)
        data2 = data2.as_in_context(ctx)
        label1 = label1.as_in_context(ctx)
        output = net(data1, data2)
        acc.update([label1], [output])
    return acc.get()

net = siamese_net()
net.initialize(ctx=ctx)
net.hybridize()

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05})

epochs = 4
moving_loss = 0.
smoothing_constant = .01
metric = mx.metric.Accuracy()

print("\n#### Shared+Module1 Training ####")
for e in range(epochs):
    metric.reset()
    # Train Branch with mod1 on dataset 1
    for i, ((data1, label1), (data2, label2)) in enumerate(zip(train_data1, train_data2)):
        data1 = data1.as_in_context(ctx)
        data2 = data2.as_in_context(ctx)
        label1 = label1.as_in_context(ctx)
        #print "label:", label1, label2
        with autograd.record():
            output = net(data1, data2)
            loss = softmax_cross_entropy(output, label1)
            loss.backward()
        trainer.step(batch_size)

        metric.update([label1], [output])

        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                    else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

        if i % 100 == 0 and i > 0:
            name, acc = metric.get()
            print('[Epoch %d Batch %d] Loss: %s Training: %s=%f'%(e, i, moving_loss, name, acc))

    _, train_accuracy = metric.get()
    _, test_accuracy = evaluate_accuracy(test_data1, test_data2, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\n" % (e, moving_loss, train_accuracy, test_accuracy))

Is it a shared weight siamese net?

edmBernard commented 6 years ago

I think it should work. But I think it increase readability to completly define your shared network first and run it on each data the same way as in my example.