Model architectures cannot be executed

Defining the following architectures for LeNet and AlexNet (as used by CryptGPU) result in an error on size mismatch on layer 4 even though the architectures run in other frameworks. Do you have an idea how to fix this?

{
    "name": "LeNet",
    "dataset": "MNIST",
    "batch_size": 128,
    "input_size": 784,
    "num_classes": 10,
    "model": [
        {
            "layer": "cnn",
            "input_hw": [28, 28],
            "in_channels": 1,
            "out_channels": 6,
            "filter_hw": [5, 5],
            "stride": 1,
            "padding": 2
        },
        {
            "layer": "relu",
            "input_dim": 4704
        },
        {
            "layer": "averagepool",
            "input_hw": [28, 28],
            "in_channels": 6,
            "pool_hw": [2, 2],
            "stride": 2
        },
        {
            "layer": "cnn",
            "input_hw": [14, 14],
            "in_channels": 6,
            "out_channels": 16,
            "filter_hw": [5, 5],
            "stride": 1,
            "padding": 0
        },
        {
            "layer": "relu",
            "input_dim": 2240
        },
        {
            "layer": "averagepool",
            "input_hw": [10, 10],
            "in_channels": 16,
            "pool_hw": [2, 2],
            "stride": 2
        },
        {
            "layer": "fc",
            "input_dim": 400,
            "output_dim": 120
        },
        {
            "layer": "relu",
            "input_dim": 120
        },
        {
            "layer": "fc",
            "input_dim": 120,
            "output_dim": 84
        },
        {
            "layer": "relu",
            "input_dim": 84
        },
        {
            "layer": "fc",
            "input_dim": 84,
            "output_dim": 10
        }
    ]
}

----------------------------------------------
(1) CNN Layer             28 x 28 x 1
                          5 x 5         (Filter Size)
                          1 , 2         (Stride, padding)
                          1             (Batch Size)
                          28 x 28 x 6   (Output)
----------------------------------------------
(2) ReLU Layer            1 x 4704
----------------------------------------------
(3) Averagepool Layer     28 x 28 x 6
                          2             (Pooling Size)
                          2             (Stride)
                          1             (Batch Size)
----------------------------------------------
(4) CNN Layer             14 x 14 x 6
                          5 x 5         (Filter Size)
                          1 , 0         (Stride, padding)
                          1             (Batch Size)
                          10 x 10 x 16  (Output)
----------------------------------------------
(5) ReLU Layer            1 x 2240
----------------------------------------------
(6) Averagepool Layer     10 x 10 x 16
                          2             (Pooling Size)
                          2             (Stride)
                          1             (Batch Size)
----------------------------------------------
(7) FC Layer              400 x 120
                          1              (Batch Size)
----------------------------------------------
(8) ReLU Layer            1 x 120
----------------------------------------------
(9) FC Layer              120 x 84
                          1              (Batch Size)
----------------------------------------------
(10) ReLU Layer           1 x 84
----------------------------------------------
(11) FC Layer             84 x 10
                          1              (Batch Size)

 -- Epoch 0 (3 iterations, log_lr = 3) --
iteration,0
layer 0
cnn,fw activation,min,-1.767334e+00,avg,-1.063008e-02,max,1.515381e+00
layer 1
relu,fw activation,min,0.000000e+00,avg,8.892500e-02,max,1.515381e+00
layer 2
avgpool,fw activation,min,0.000000e+00,avg,8.888982e-02,max,1.299316e+00
layer 3
cnn,fw activation,min,-1.362305e+00,avg,-5.258331e-02,max,1.286377e+00
layer 4
piranha: src/nn/../mpc/RSS.inl:545: void selectShare(const RSS<T, I>&, const RSS<T, I2>&, const RSS<U, I3>&, RSS<T, I4>&) [with piranha: src/nn/../mpc/RSS.inl:545: void selectShare(const RSS<T, I>&, const RSS<T, I2>&, const RSS<U, I3>&, RSS<T, I4>&) [with T = long unsigned int; U = unsigned char; I = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I2 = thruT = long unsigned int; U = unsigned char; I = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I2 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I3 = thrust::detail::normal_iterator<thrust::device_ptr<unsst::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I3 = thrust::detail::normal_iterator<thrust::device_ptr<unsigned char> >; I4 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >]: Assertion `x.size() == y.size() &&igned char> >; I4 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >]: Assertion `x.size() == y.size() && x.size() == b.size() && x.size() == z.size() && "RSS selectShare input size mismatch"' failed.
 x.size() == b.size() && x.size() == z.size() && "RSS selectShare input size mismatch"' failed.
piranha: src/nn/../mpc/RSS.inl:545: void selectShare(const RSS<T, I>&, const RSS<T, I2>&, const RSS<U, I3>&, RSS<T, I4>&) [with T = long unsigned int; U = unsigned char; I = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I2 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I3 = thrust::detail::normal_iterator<thrust::device_ptr<unsigned char> >; I4 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >]: Assertion `x.size() == y.size() && x.size() == b.size() && x.size() == z.size() && "RSS selectShare input size mismatch"' failed.
^C./files/samples/localhost_runner.sh: line 6: 927560 Aborted

{
    "name": "AlexNet_CryptGpu",
    "dataset": "CIFAR-10",
    "batch_size": 128,
    "input_size": 3072,
    "num_classes": 10,
    "model": [
        {
            "layer": "cnn",
            "input_hw": [32, 32],
            "in_channels": 3,
            "out_channels": 96,
            "filter_hw": [11, 11],
            "stride": 4,
            "padding": 9
        },
        {
            "layer": "relu",
            "input_dim": 9600
        },
        {
            "layer": "averagepool",
            "input_hw": [10, 10],
            "in_channels": 96,
            "pool_hw": [3, 3],
            "stride": 2
        },
        {
            "layer": "cnn",
            "input_hw": [4, 4],
            "in_channels": 96,
            "out_channels": 256,
            "filter_hw": [5, 5],
            "stride": 1,
            "padding": 1
        },
        {
            "layer": "relu",
            "input_dim": 4096
        },
        {
            "layer": "averagepool",
            "input_hw": [4, 4],
            "in_channels": 256,
            "pool_hw": [2, 2],
            "stride": 1
        },
        {
            "layer": "cnn",
            "input_hw": [3, 3],
            "in_channels": 256,
            "out_channels": 384,
            "filter_hw": [3, 3],
            "stride": 1,
            "padding": 1
        },
        {
            "layer": "relu",
            "input_dim": 3456
        },
        {
            "layer": "cnn",
            "input_hw": [3, 3],
            "in_channels": 384,
            "out_channels": 384,
            "filter_hw": [3, 3],
            "stride": 1,
            "padding": 1
        },
        {
            "layer": "relu",
            "input_dim": 3456
        },
        {
            "layer": "cnn",
            "input_hw": [3, 3],
            "in_channels": 384,
            "out_channels": 256,
            "filter_hw": [3, 3],
            "stride": 1,
            "padding": 1
        },
        {
            "layer": "relu",
            "input_dim": 2304
        },
        {
            "layer": "fc",
            "input_dim": 2304,
            "output_dim": 256
        },
        {
            "layer": "relu",
            "input_dim": 256
        },
        {
            "layer": "fc",
            "input_dim": 256,
            "output_dim": 256
        },
        {
            "layer": "relu",
            "input_dim": 256
        },
        {
            "layer": "fc",
            "input_dim": 256,
            "output_dim": 10
        }
    ]
}

----------------------------------------------
(1) CNN Layer             32 x 32 x 3
                          11 x 11       (Filter Size)
                          4 , 9         (Stride, padding)
                          1             (Batch Size)
                          10 x 10 x 96  (Output)
----------------------------------------------
(2) ReLU Layer            1 x 9600
----------------------------------------------
(3) Averagepool Layer     10 x 10 x 96
                          3             (Pooling Size)
                          2             (Stride)
                          1             (Batch Size)
----------------------------------------------
(4) CNN Layer             4 x 4 x 96
                          5 x 5         (Filter Size)
                          1 , 1         (Stride, padding)
                          1             (Batch Size)
                          2 x 2 x 256   (Output)
----------------------------------------------
(5) ReLU Layer            1 x 4096
----------------------------------------------
(6) Averagepool Layer     4 x 4 x 256
                          2             (Pooling Size)
                          1             (Stride)
                          1             (Batch Size)
----------------------------------------------
(7) CNN Layer             3 x 3 x 256
                          3 x 3         (Filter Size)
                          1 , 1         (Stride, padding)
                          1             (Batch Size)
                          3 x 3 x 384   (Output)
----------------------------------------------
(8) ReLU Layer            1 x 3456
----------------------------------------------
(9) CNN Layer             3 x 3 x 384
                          3 x 3         (Filter Size)
                          1 , 1         (Stride, padding)
                          1             (Batch Size)
                          3 x 3 x 384   (Output)
----------------------------------------------
(10) ReLU Layer           1 x 3456
----------------------------------------------
(11) CNN Layer            3 x 3 x 384
                          3 x 3         (Filter Size)
                          1 , 1         (Stride, padding)
                          1             (Batch Size)
                          3 x 3 x 256   (Output)
----------------------------------------------
(12) ReLU Layer           1 x 2304
----------------------------------------------
(13) FC Layer             2304 x 256
                          1              (Batch Size)
----------------------------------------------
(14) ReLU Layer           1 x 256
----------------------------------------------
(15) FC Layer             256 x 256
                          1              (Batch Size)
----------------------------------------------
(16) ReLU Layer           1 x 256
----------------------------------------------
(17) FC Layer             256 x 10
                          1              (Batch Size)
Error opening training data file at files/CIFAR-10/train_data
Error opening training label file at files/CIFAR-10/train_labels
Error opening test data file at files/CIFAR-10/test_data
Error opening test label file at files/CIFAR-10/test_label
TRAINING, EPOCHS = 1 ITERATIONS = 3

 == Training (1 epochs) ==

 -- Epoch 0 (3 iterations, log_lr = 3) --
iteration,0
layer 0
cnn,fw activation,min,0.000000e+00,avg,0.000000e+00,max,0.000000e+00
layer 1
relu,fw activation,min,0.000000e+00,avg,0.000000e+00,max,0.000000e+00
layer 2
avgpool,fw activation,min,0.000000e+00,avg,0.000000e+00,max,0.000000e+00
layer 3
cnn,fw activation,min,0.000000e+00,avg,0.000000e+00,max,0.000000e+00
layer 4
piranha: src/nn/../mpc/RSS.inl:545: void selectShare(const RSS<T, I>&, const RSS<T, I2>&, const RSS<U, I3>&, RSS<T, I4>&) [with piranha: src/nn/../mpc/RSS.inl:545: void selectShare(const RSS<T, I>&, const RSS<T, I2>&, const RSS<U, I3>&, RSS<T, I4>&) [with T = long unsigned int; U = unsigned char; I = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I2 = thruT = long unsigned int; U = unsigned char; I = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I2 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I3 = thrust::detail::normal_iterator<thrust::device_ptr<unsst::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I3 = thrust::detail::normal_iterator<thrust::device_ptr<unsigned char> >; I4 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >]: Assertion `x.size() == y.size() &&igned char> >; I4 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >]: Assertion `x.size() == y.size() && x.size() == b.size() && x.size() == z.size() && "RSS selectShare input size mismatch"' failed.
 x.size() == b.size() && x.size() == z.size() && "RSS selectShare input size mismatch"' failed.
piranha: src/nn/../mpc/RSS.inl:545: void selectShare(const RSS<T, I>&, const RSS<T, I2>&, const RSS<U, I3>&, RSS<T, I4>&) [with T = long unsigned int; U = unsigned char; I = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I2 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >; I3 = thrust::detail::normal_iterator<thrust::device_ptr<unsigned char> >; I4 = thrust::detail::normal_iterator<thrust::device_ptr<long unsigned int> >]: Assertion `x.size() == y.size() && x.size() == b.size() && x.size() == z.size() && "RSS selectShare input size mismatch"' failed.
./files/samples/localhost_runner.sh: line 6: 946446 Aborted

ucbrise / piranha

Model architectures cannot be executed #17