Some questions on your conv. n.n. architecture and blend network

Dear Mathis,
I have implemented the conv. net. according to your documentation in (https://www.kaggle.com/blobs/download/forum-message-attachment-files/2797/report.pdf) as follows, albeit with some modifications to make it good (I think) for an small dataset of 1000 elements of training set (the distribution of all 5 classes are approximately equal).
1- In your convolutional network, the output of your network is a 512 element vector, the maxout layer. As far as I know from the NN, we need a label for every input to the network. So, what is the label here?
2- The number of inputs to your blend network is 8193. How did you come with this number?
My code is as follow:
import sys
import os
import time

import numpy as np
import theano
import theano.tensor as T

import lasagne

import pandas as pd
from PIL import Image

input_var = T.tensor4('inputs')
target_var = T.ivector('targets')

network = lasagne.layers.InputLayer(shape=(np.newaxis, 3, 512, 512),
                                        input_var=input_var)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=8, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform(),
            stride=(2, 2))#, pad=1)#, partial_sum=2)
# network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
network = lasagne.layers.Conv2DLayer(
            network, num_filters=8, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=16, filter_size=(5, 5),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform(),
            stride=(2, 2))#, pad=1)#, partial_sum=2)
# network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
network = lasagne.layers.Conv2DLayer(
            network, num_filters=16, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=16, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=32, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=32, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=32, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=64, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=64, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=64, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(3,3), stride=2)
network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=128,
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.FeaturePoolLayer(network, pool_size=2)

network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=128,
            nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.FeaturePoolLayer(network, pool_size=2)

network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=5,
            nonlinearity=lasagne.nonlinearities.softmax)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=0.01, momentum=0.9)
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
      target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
      dtype=theano.config.floatX)

# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=0.001, momentum=0.9)
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
      target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
      dtype=theano.config.floatX)

# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    # assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        # yield inputs[excerpt,:,:,:], targets[excerpt,:,:,:]
        yield inputs[excerpt,:,:,:], targets[excerpt]
BASE_TRAIN_DIR = r'/home/ali/DiabeticRethinopathy/diabeticrethinopathy/data/train/'
train_labels = pd.read_csv(r'/home/ali/DiabeticRethinopathy/diabeticrethinopathy/data/train.csv', sep=',')
train_data = []
tc = 0
for id in train_labels['image']:
    tc += 1
    train_data.append(np.array(Image.open(os.path.join(BASE_TRAIN_DIR, id + '.tiff'))).T)
train_data = np.array(train_data).astype("float32") / 255.0
trainLABELS = np.zeros((tc,5))
i = 0
for lbl in train_labels['level'].values:
    trainLABELS[i][lbl] = 1
    i += 1

BASE_TEST_DIR = r'/home/ali/DiabeticRethinopathy/diabeticrethinopathy/data/test/'
test_labels = pd.read_csv(r'/home/ali/DiabeticRethinopathy/diabeticrethinopathy/data/test.csv', sep=',')
test_data = []
tc = 0
for id in test_labels['image']:
    tc += 1 
    test_data.append(np.array(Image.open(os.path.join(BASE_TEST_DIR, id + '.tiff'))).T)
test_data = np.array(test_data).astype("float32") / 255.0
testLABELS = np.zeros((tc,5))
i = 0
for lbl in test_labels['level'].values:
    testLABELS[i][lbl] = 1
    i += 1
num_epochs = 10
X_train = train_data[:960,:,:,:]
y_train = train_labels['level'].values.astype("uint8")[:960]
X_val = test_data[:960,:,:,:]
y_val = test_labels['level'].values.astype("uint8")[:960]
X_test = test_data[:960,:,:,:]
y_test = test_labels['level'].values.astype("uint8")[:960]
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, 32, shuffle=True):
        print '*',
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1
    print '$'
    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, 32, shuffle=True):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1
        print '+',
        if val_batches > 1:
            break
    print '$'

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    if train_batches == 0:
        train_batches = -1
    if val_batches == 0:
        val_batches = -1
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 32, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
if test_batches == 0:
    test_batches = -1
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))
sveitser / kaggle_diabetic

Some questions on your conv. n.n. architecture and blend network #5