Separable Depthwise Layer?

Hi, heavy questioner is back.

today, I tried to apply popular separable dw conv layer introduced in theano 0.10.0(but not in lasagne) to DAN, but as I'm a newbie in theano, I got lost....

below is my custom layer code for "SeparableDepthWiseConvolutionLayer"

[ SeparableDepthWiseConvolutionLayer.py ]

import theano
from lasagne.layers import Layer
import lasagne.init
from theano.tensor.nnet.abstract_conv import separable_conv2d
import numpy
rng = numpy.random

class SeparableDepthWiseConvolutionLayer(Layer):
    def __init__(self, input, input_shape, num_dw_channels=32, num_pw_filters=32, filter_size=[3, 3], stride=[1, 1], **kwargs):
        self.input_shape=input_shape
        self.num_dw_channels = num_dw_channels
        self.num_pw_filters  = num_pw_filters
        self.filter_size=filter_size
        self.stride=stride
        # dimension : self.num_dw_channels, 1, self.filter_size[0], self.filter_size[1]
        self.depthwise_filters= numpy.asarray(rng.randn(self.num_dw_channels * self.filter_size[0] * self.filter_size[1])).astype(theano.config.floatX)
        # dimension : (self.num_pw_filters, self.num_dw_channels, 1, 1)
        self.pointwise_filters= numpy.asarray(rng.randn(self.num_pw_filters * self.num_dw_channels)).astype(theano.config.floatX)
        super(SeparableDepthWiseConvolutionLayer, self).__init__(input, **kwargs)

    def get_output_shape_for(self, input_shape):
        return (1, input_shape[1]*int(self.stride[0]), input_shape[2]//self.stride[0],input_shape[3]//self.stride[0])

    def get_output_for(self, input, **kwargs):
        x_sym = theano.tensor.tensor4('x')
        dfilter_sym = theano.tensor.tensor4('d')
        pfilter_sym = theano.tensor.tensor4('p')
        sep_op = separable_conv2d(x_sym ,
                                  dfilter_sym ,
                                  pfilter_sym ,
                                  self.num_dw_channels,
                                  input_shape=self.input_shape,
                                  depthwise_filter_shape=(self.num_dw_channels,1,self.filter_size[0],self.filter_size[1]),
                                  pointwise_filter_shape=(self.num_pw_filters, self.num_dw_channels, 1, 1)
                                  )
        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
        output = fun(input ,self.depthwise_filters, self.pointwise_filters)
        return output

and then, I replaced all existing lasagnes' conv+batchnorm layers in createCNN() with the new layer as below;

[ DANtraining.py ]

from SepDWConvLayer import SeparableDepthWiseConvolutionLayer as SPDWConv ... ... def createCNN(self): ... ...

net['s1_conv1_1'] = batch_norm(Conv2DLayer(net['input'], 64, 3, pad='same', W=GlorotUniform('relu'))) ->> net['s1_conv1_1'] = batch_norm(SPDWConv(net['s1_conv0_1'], net['s1_conv0_1'].output_shape, 32, 64,[3, 3], stride=[1, 1] )) net['s1_conv1_2'] = batch_norm(Conv2DLayer(net['s1_conv1_1'], 64, 3, pad='same', W=GlorotUniform('relu'))) ->> net['s1_conv1_2'] = batch_norm(SPDWConv(net['s1_conv1_1'], net['s1_conv1_1'].output_shape, 64, 128,[3, 3], stride=[2, 2] )) .. .. .. so on.

so the createCNN() looks like :

def createCNN(self):
    net = {}
    net['input'] = lasagne.layers.InputLayer(shape=(1, self.nChannels, self.imageHeight, self.imageWidth), input_var=self.data)
    print("Input shape: {0}".format(net['input'].output_shape))

    #STAGE 1
    net['s1_conv0_1'] = batch_norm(Conv2DLayer(net['input'], 32, 3, stride=(1,1), pad='same', W=GlorotUniform('relu')))
    print("s1_conv0_1 shape: {0}".format(net['s1_conv0_1'].output_shape))

    net['s1_conv1_1'] = batch_norm(SPDWConv(net['s1_conv0_1'], net['s1_conv0_1'].output_shape, 32,  64,[3, 3], stride=[1, 1] ))
    print("s1_conv1_1 shape: {0}".format(net['s1_conv1_1'].output_shape))
    net['s1_conv1_2'] = batch_norm(SPDWConv(net['s1_conv1_1'], net['s1_conv1_1'].output_shape, 64, 128,[3, 3], stride=[2, 2] ))
    print("s1_conv1_2 shape: {0}".format(net['s1_conv1_2'].output_shape))

    net['s1_conv2_1'] = batch_norm(SPDWConv(net['s1_conv1_2'], net['s1_conv1_2'].output_shape,128, 128,[3, 3], stride=[1, 1] ))
    print("s1_conv2_1 shape: {0}".format(net['s1_conv2_1'].output_shape))
    net['s1_conv2_2'] = batch_norm(SPDWConv(net['s1_conv2_1'], net['s1_conv2_1'].output_shape,128, 256,[3, 3], stride=[2, 2] ))
    print("s1_conv2_2 shape: {0}".format(net['s1_conv2_2'].output_shape))

    net['s1_conv3_1'] = batch_norm(SPDWConv(net['s1_conv2_2'], net['s1_conv2_2'].output_shape,256, 256,[3, 3], stride=[1, 1] ))
    print("s1_conv3_1 shape: {0}".format(net['s1_conv3_1'].output_shape))
    net['s1_conv3_2'] = batch_norm(SPDWConv(net['s1_conv3_1'], net['s1_conv3_1'].output_shape,256, 512,[3, 3], stride=[2, 2] ))
    print("s1_conv3_2 shape: {0}".format(net['s1_conv3_2'].output_shape))

    net['s1_conv4_1'] = batch_norm(SPDWConv(net['s1_conv3_2'], net['s1_conv3_2'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
    print("s1_conv4_1 shape: {0}".format(net['s1_conv4_1'].output_shape))
    net['s1_conv4_2'] = batch_norm(SPDWConv(net['s1_conv4_1'], net['s1_conv4_1'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
    print("s1_conv4_2 shape: {0}".format(net['s1_conv4_2'].output_shape))
    net['s1_conv4_3'] = batch_norm(SPDWConv(net['s1_conv4_2'], net['s1_conv4_2'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
    print("s1_conv4_3 shape: {0}".format(net['s1_conv4_3'].output_shape))
    net['s1_conv4_4'] = batch_norm(SPDWConv(net['s1_conv4_3'], net['s1_conv4_3'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
    print("s1_conv4_4 shape: {0}".format(net['s1_conv4_4'].output_shape))
    net['s1_conv4_5'] = batch_norm(SPDWConv(net['s1_conv4_4'], net['s1_conv4_4'].output_shape,512, 512,[3, 3], stride=[1, 1] ))
    print("s1_conv4_5 shape: {0}".format(net['s1_conv4_5'].output_shape))

    net['s1_conv5_1'] = batch_norm(SPDWConv(net['s1_conv4_5'], net['s1_conv4_5'].output_shape,512, 1024,[3, 3], stride=[2, 2]))
    print("s1_conv5_1 shape: {0}".format(net['s1_conv5_1'].output_shape))
    net['s1_conv5_2'] = batch_norm(SPDWConv(net['s1_conv5_1'], net['s1_conv5_1'].output_shape,1024,1024,[3, 3], stride=[1, 1]))
    print("s1_conv5_2 shape: {0}".format(net['s1_conv5_2'].output_shape))

    net['s1_pool6'] = lasagne.layers.Pool2DLayer(net['s1_conv5_2'], 1)
    print("s1_pool6 shape: {0}".format(net['s1_pool6'].output_shape))

    net['s1_fc1_dropout'] = lasagne.layers.DropoutLayer(net['s1_pool6'], p=0.5)
    print("s1_fc1_dropout shape: {0}".format(net['s1_fc1_dropout'].output_shape))
    net['s1_fc1'] = batch_norm(lasagne.layers.DenseLayer(net['s1_fc1_dropout'], num_units=256, W=GlorotUniform('relu')))
    print("s1_fc1 shape: {0}".format(net['s1_fc1'].output_shape))
    net['s1_output'] = lasagne.layers.DenseLayer(net['s1_fc1'], num_units=136, nonlinearity=None)
    print("s1_output shape: {0}".format(net['s1_output'].output_shape))
    net['s1_landmarks'] = LandmarkInitLayer(net['s1_output'], self.initLandmarks)
    print("s1_landmarks shape: {0}".format(net['s1_landmarks'].output_shape)) 

    for i in range(1, self.nStages):
        self.addDANStage(i + 1, net)

    net['output'] = net['s' + str(self.nStages) + '_landmarks']

    return net

my questions are,

above code makes an error in get_output_for : x_sym = theano.tensor.tensor4('x') " Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array? "

( I'm referencing the theano test code "test_abstract_conv.py", https://github.com/Theano/Theano/blob/8dccbe6e1000239f57006e556fe8f737bb717aba/theano/tensor/nnet/tests/test_abstract_conv.py

There is def test_interface2d(self): in line 1683, and they test it with real numpy array values for input and deptwise/pointwise filters...

self.x = np.array([[[[1, 2, 3, 4, 5], [3, 2, 1, 4, 5], [3, 3, 1, 3, 6], [5, 3, 2, 1, 1], [4, 7, 1, 2, 1]], [[3, 3, 1, 2, 6], [6, 5, 4, 3, 1], [3, 4, 5, 2, 3], [6, 4, 1, 3, 4], [2, 3, 4, 2, 5]]]]).astype(theano.config.floatX) self.depthwise_filter = np.array([[[[3, 2, 1], [5, 3, 2], [6, 4, 2]]], [[[5, 5, 2], [3, 7, 4], [3, 5, 4]]], [[[7, 4, 7], [5, 3, 3], [1, 3, 1]]], [[[4, 4, 4], [2, 4, 6], [0, 0, 7]]]]).astype(theano.config.floatX) self.pointwise_filter = np.array([[[[4]], [[1]], [[3]], [[5]]], [[[2]], [[1]], [[2]], [[8]]]]).astype(theano.config.floatX) x_sym = theano.tensor.tensor4('x') dfilter_sym = theano.tensor.tensor4('d') pfilter_sym = theano.tensor.tensor4('p') sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, self.x.shape[1]) fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') top = fun(self.x, self.depthwise_filter, self.pointwise_filter)

but in my code, I'm passing "input"(is it TensorVariable?) to the theano.function

def get_output_for(self, input, kwargs): x_sym = theano.tensor.tensor4('x') dfilter_sym = theano.tensor.tensor4('d') pfilter_sym = theano.tensor.tensor4('p') sep_op = separable_conv2d(x_sym , dfilter_sym , pfilter_sym , ...) fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') output = fun(input** ,self.depthwise_filters, self.pointwise_filters) return output

another try also failed :

def __init__(self, input, input_shape, num_dw_channels=32, num_pw_filters=32, filter_size=[3, 3], stride=[1, 1],
             **kwargs):
    self.input=inputs

... ... def get_output_for(self, input, **kwargs): x_sym = theano.tensor.tensor4('x_sym') dfilter_sym = theano.tensor.tensor4('dfilter_sym') pfilter_sym = theano.tensor.tensor4('pfilter_sym') sep_op = separable_conv2d(x_sym , dfilter_sym, pfilter_sym,... ) fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN') output = fun(self.input ,self.depthwise_filters, self.pointwise_filters) return output

error : x_sym = theano.tensor.tensor4('x_sym') float() argument must be a string or a number

is there a way to pass the real value to theano.function to avoid this error? I thouht, I should use symbols to build a graph for compile.

do you think the theano's new separable_conv2d op(which uses Abstract2D class) can replace the existing Conv2DLayer as I did? "Abstract" seems to me, it presents just an interface, so user should implement the actual mothod. But when I followed the theano codes, it seems there is an actual implementation for depthwise + pointwise conv in abtract_conv.py (https://github.com/Theano/Theano/blob/4d46e410bc765e9e288996c7da693146df69e3b9/theano/tensor/nnet/abstract_conv.py).
what method could you suggest for initializing depthwise/pointwise weight?

thank you in advance!

MarekKowalski / DeepAlignmentNetwork

Separable Depthwise Layer? #11

[ SeparableDepthWiseConvolutionLayer.py ]

[ DANtraining.py ]