Closed QinZiwen closed 5 years ago
I follow your code, try to code conv net:
import mxnet as mx from mxnet import nd, autograd from mxnet import gluon from mxnet.gluon import nn import numpy as np import sys ctx = mx.gpu() batch_size = 128 num_inputs = 784 num_outputs = 10 def transform(data, label): data = nd.transpose(data, (2,0,1)) return data.astype(np.float32)/255, label.astype(np.float32) train_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=False) test_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False) train_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=False) test_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False) class siamese_net(nn.HybridBlock): def __init__(self, **kwargs): super(siamese_net, self).__init__(**kwargs) with self.name_scope(): self.conv1 = nn.Conv2D(128, kernel_size=3, strides=1, padding=1, activation='relu') self.conv2 = nn.Conv2D(256, kernel_size=3, strides=1, padding=1, activation='relu') self.conv3 = nn.Conv2D(256, kernel_size=3, strides=1, padding=1, activation='relu') self.dense1 = nn.Dense(1024, activation='relu') self.dense2 = nn.Dense(num_outputs) self.maxpool = nn.MaxPool2D(pool_size=3, strides=2) def convNet(self, img): conv = self.conv1(img) conv = self.maxpool(conv) conv = self.conv2(conv) conv = self.maxpool(conv) conv = self.conv3(conv) conv = self.maxpool(conv) flatten = nn.Flatten() return flatten(conv) def hybrid_forward(self, F, img1, img2): out1 = self.convNet(img1) out2 = self.convNet(img2) out = F.concat(out1, out2, dim=1) out = self.dense2(self.dense1(out)) return out def evaluate_accuracy(data_iterator1, data_iterator2, net): acc = mx.metric.Accuracy() for i, ((data1, label1), (data2, label2)) in enumerate(zip(data_iterator1, data_iterator2)): data1 = data1.as_in_context(ctx) data2 = data2.as_in_context(ctx) label1 = label1.as_in_context(ctx) output = net(data1, data2) acc.update([label1], [output]) return acc.get() net = siamese_net() net.initialize(ctx=ctx) net.hybridize() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05}) epochs = 4 moving_loss = 0. smoothing_constant = .01 metric = mx.metric.Accuracy() print("\n#### Shared+Module1 Training ####") for e in range(epochs): metric.reset() # Train Branch with mod1 on dataset 1 for i, ((data1, label1), (data2, label2)) in enumerate(zip(train_data1, train_data2)): data1 = data1.as_in_context(ctx) data2 = data2.as_in_context(ctx) label1 = label1.as_in_context(ctx) #print "label:", label1, label2 with autograd.record(): output = net(data1, data2) loss = softmax_cross_entropy(output, label1) loss.backward() trainer.step(batch_size) metric.update([label1], [output]) curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss) if i % 100 == 0 and i > 0: name, acc = metric.get() print('[Epoch %d Batch %d] Loss: %s Training: %s=%f'%(e, i, moving_loss, name, acc)) _, train_accuracy = metric.get() _, test_accuracy = evaluate_accuracy(test_data1, test_data2, net) print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\n" % (e, moving_loss, train_accuracy, test_accuracy))
Is it a shared weight siamese net?
I think it should work. But I think it increase readability to completly define your shared network first and run it on each data the same way as in my example.
I follow your code, try to code conv net:
Is it a shared weight siamese net?