I train a model: base cnn part use your mobilenet-ssd; next use SSH M1 M2 M3; change dwconv to DepthwiseConvolution use https://github.com/yonghenglh6/DepthwiseConvolution the final model size is 101M, (SSH vgg16 model only 96M) and the forward time is 100ms, for 1200x1600 input. I don't know why slowly and model bigger.

my prototxt: `# transform_param {

scale: 0.017

mirror: false

crop_size: 224

mean_value: [127.5, 127.5, 127.5]

}

name: "SSH" layer { name: 'input-data' type: 'Python' top: 'data' top: 'im_info' top: 'gt_boxes' python_param { module: 'roi_data_layer.layer' layer: 'RoIDataLayer' param_str: "'num_classes': 2" } }

layer { name: "conv0" type: "Convolution" bottom: "data" top: "conv0" param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 32 bias_term: false pad: 1 kernel_size: 3 stride: 2 weight_filler { type: "msra" } } } layer { name: "conv0/bn" type: "BatchNorm" bottom: "conv0" top: "conv0" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv0/scale" type: "Scale" bottom: "conv0" top: "conv0" param { lr_mult: 0 decay_mult: 0.0 } param { lr_mult: 0 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv0/relu" type: "ReLU" bottom: "conv0" top: "conv0" } layer { name: "conv1/dw" type: "DepthwiseConvolution" bottom: "conv0" top: "conv1/dw" param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 32 bias_term: false pad: 1 kernel_size: 3 group: 32

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv1/dw/bn" type: "BatchNorm" bottom: "conv1/dw" top: "conv1/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv1/dw/scale" type: "Scale" bottom: "conv1/dw" top: "conv1/dw" param { lr_mult: 0 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv1/dw/relu" type: "ReLU" bottom: "conv1/dw" top: "conv1/dw" } layer { name: "conv1" type: "Convolution" bottom: "conv1/dw" top: "conv1" param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv1/bn" type: "BatchNorm" bottom: "conv1" top: "conv1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv1/scale" type: "Scale" bottom: "conv1" top: "conv1" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv1/relu" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "conv2/dw" type: "DepthwiseConvolution" bottom: "conv1" top: "conv2/dw" param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 bias_term: false pad: 1 kernel_size: 3 stride: 2 group: 64

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv2/dw/bn" type: "BatchNorm" bottom: "conv2/dw" top: "conv2/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv2/dw/scale" type: "Scale" bottom: "conv2/dw" top: "conv2/dw" param { lr_mult: 0 decay_mult: 0.0 } param { lr_mult: 0 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv2/dw/relu" type: "ReLU" bottom: "conv2/dw" top: "conv2/dw" } layer { name: "conv2" type: "Convolution" bottom: "conv2/dw" top: "conv2" param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 128 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv2/bn" type: "BatchNorm" bottom: "conv2" top: "conv2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv2/scale" type: "Scale" bottom: "conv2" top: "conv2" param { lr_mult: 0 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv2/relu" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "conv3/dw" type: "DepthwiseConvolution" bottom: "conv2" top: "conv3/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 128 bias_term: false pad: 1 kernel_size: 3 group: 128

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv3/dw/bn" type: "BatchNorm" bottom: "conv3/dw" top: "conv3/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv3/dw/scale" type: "Scale" bottom: "conv3/dw" top: "conv3/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv3/dw/relu" type: "ReLU" bottom: "conv3/dw" top: "conv3/dw" } layer { name: "conv3" type: "Convolution" bottom: "conv3/dw" top: "conv3" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 128 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv3/bn" type: "BatchNorm" bottom: "conv3" top: "conv3" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv3/scale" type: "Scale" bottom: "conv3" top: "conv3" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv3/relu" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4/dw" type: "DepthwiseConvolution" bottom: "conv3" top: "conv4/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 128 bias_term: false pad: 1 kernel_size: 3 stride: 2 group: 128

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv4/dw/bn" type: "BatchNorm" bottom: "conv4/dw" top: "conv4/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv4/dw/scale" type: "Scale" bottom: "conv4/dw" top: "conv4/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv4/dw/relu" type: "ReLU" bottom: "conv4/dw" top: "conv4/dw" } layer { name: "conv4" type: "Convolution" bottom: "conv4/dw" top: "conv4" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 256 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv4/bn" type: "BatchNorm" bottom: "conv4" top: "conv4" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv4/scale" type: "Scale" bottom: "conv4" top: "conv4" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv4/relu" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5/dw" type: "DepthwiseConvolution" bottom: "conv4" top: "conv5/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 256 bias_term: false pad: 1 kernel_size: 3 group: 256

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv5/dw/bn" type: "BatchNorm" bottom: "conv5/dw" top: "conv5/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv5/dw/scale" type: "Scale" bottom: "conv5/dw" top: "conv5/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv5/dw/relu" type: "ReLU" bottom: "conv5/dw" top: "conv5/dw" } layer { name: "conv5" type: "Convolution" bottom: "conv5/dw" top: "conv5" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 256 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv5/bn" type: "BatchNorm" bottom: "conv5" top: "conv5" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv5/scale" type: "Scale" bottom: "conv5" top: "conv5" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv5/relu" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "conv6/dw" type: "DepthwiseConvolution" bottom: "conv5" top: "conv6/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 256 bias_term: false pad: 1 kernel_size: 3 stride: 2 group: 256

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv6/dw/bn" type: "BatchNorm" bottom: "conv6/dw" top: "conv6/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv6/dw/scale" type: "Scale" bottom: "conv6/dw" top: "conv6/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv6/dw/relu" type: "ReLU" bottom: "conv6/dw" top: "conv6/dw" } layer { name: "conv6" type: "Convolution" bottom: "conv6/dw" top: "conv6" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv6/bn" type: "BatchNorm" bottom: "conv6" top: "conv6" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv6/scale" type: "Scale" bottom: "conv6" top: "conv6" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv6/relu" type: "ReLU" bottom: "conv6" top: "conv6" } layer { name: "conv7/dw" type: "DepthwiseConvolution" bottom: "conv6" top: "conv7/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv7/dw/bn" type: "BatchNorm" bottom: "conv7/dw" top: "conv7/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv7/dw/scale" type: "Scale" bottom: "conv7/dw" top: "conv7/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv7/dw/relu" type: "ReLU" bottom: "conv7/dw" top: "conv7/dw" } layer { name: "conv7" type: "Convolution" bottom: "conv7/dw" top: "conv7" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv7/bn" type: "BatchNorm" bottom: "conv7" top: "conv7" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv7/scale" type: "Scale" bottom: "conv7" top: "conv7" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv7/relu" type: "ReLU" bottom: "conv7" top: "conv7" } layer { name: "conv8/dw" type: "DepthwiseConvolution" bottom: "conv7" top: "conv8/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv8/dw/bn" type: "BatchNorm" bottom: "conv8/dw" top: "conv8/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv8/dw/scale" type: "Scale" bottom: "conv8/dw" top: "conv8/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv8/dw/relu" type: "ReLU" bottom: "conv8/dw" top: "conv8/dw" } layer { name: "conv8" type: "Convolution" bottom: "conv8/dw" top: "conv8" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv8/bn" type: "BatchNorm" bottom: "conv8" top: "conv8" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv8/scale" type: "Scale" bottom: "conv8" top: "conv8" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv8/relu" type: "ReLU" bottom: "conv8" top: "conv8" } layer { name: "conv9/dw" type: "DepthwiseConvolution" bottom: "conv8" top: "conv9/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv9/dw/bn" type: "BatchNorm" bottom: "conv9/dw" top: "conv9/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv9/dw/scale" type: "Scale" bottom: "conv9/dw" top: "conv9/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv9/dw/relu" type: "ReLU" bottom: "conv9/dw" top: "conv9/dw" } layer { name: "conv9" type: "Convolution" bottom: "conv9/dw" top: "conv9" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv9/bn" type: "BatchNorm" bottom: "conv9" top: "conv9" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv9/scale" type: "Scale" bottom: "conv9" top: "conv9" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv9/relu" type: "ReLU" bottom: "conv9" top: "conv9" } layer { name: "conv10/dw" type: "DepthwiseConvolution" bottom: "conv9" top: "conv10/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv10/dw/bn" type: "BatchNorm" bottom: "conv10/dw" top: "conv10/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv10/dw/scale" type: "Scale" bottom: "conv10/dw" top: "conv10/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv10/dw/relu" type: "ReLU" bottom: "conv10/dw" top: "conv10/dw" } layer { name: "conv10" type: "Convolution" bottom: "conv10/dw" top: "conv10" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv10/bn" type: "BatchNorm" bottom: "conv10" top: "conv10" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv10/scale" type: "Scale" bottom: "conv10" top: "conv10" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv10/relu" type: "ReLU" bottom: "conv10" top: "conv10" } layer { name: "conv11/dw" type: "DepthwiseConvolution" bottom: "conv10" top: "conv11/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 group: 512

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv11/dw/bn" type: "BatchNorm" bottom: "conv11/dw" top: "conv11/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv11/dw/scale" type: "Scale" bottom: "conv11/dw" top: "conv11/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv11/dw/relu" type: "ReLU" bottom: "conv11/dw" top: "conv11/dw" } layer { name: "conv11" type: "Convolution" bottom: "conv11/dw" top: "conv11" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv11/bn" type: "BatchNorm" bottom: "conv11" top: "conv11" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv11/scale" type: "Scale" bottom: "conv11" top: "conv11" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv11/relu" type: "ReLU" bottom: "conv11" top: "conv11" } layer { name: "conv12/dw" type: "DepthwiseConvolution" bottom: "conv11" top: "conv12/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 512 bias_term: false pad: 1 kernel_size: 3 stride: 2 group: 512

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv12/dw/bn" type: "BatchNorm" bottom: "conv12/dw" top: "conv12/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv12/dw/scale" type: "Scale" bottom: "conv12/dw" top: "conv12/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv12/dw/relu" type: "ReLU" bottom: "conv12/dw" top: "conv12/dw" } layer { name: "conv12" type: "Convolution" bottom: "conv12/dw" top: "conv12" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 1024 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv12/bn" type: "BatchNorm" bottom: "conv12" top: "conv12" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv12/scale" type: "Scale" bottom: "conv12" top: "conv12" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv12/relu" type: "ReLU" bottom: "conv12" top: "conv12" } layer { name: "conv13/dw" type: "DepthwiseConvolution" bottom: "conv12" top: "conv13/dw" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 1024 bias_term: false pad: 1 kernel_size: 3 group: 1024

engine: CAFFE

weight_filler {
  type: "msra"
}

} } layer { name: "conv13/dw/bn" type: "BatchNorm" bottom: "conv13/dw" top: "conv13/dw" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv13/dw/scale" type: "Scale" bottom: "conv13/dw" top: "conv13/dw" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv13/dw/relu" type: "ReLU" bottom: "conv13/dw" top: "conv13/dw" } layer { name: "conv13" type: "Convolution" bottom: "conv13/dw" top: "conv13" param { lr_mult: 0.1 decay_mult: 0.1 } convolution_param { num_output: 1024 bias_term: false kernel_size: 1 weight_filler { type: "msra" } } } layer { name: "conv13/bn" type: "BatchNorm" bottom: "conv13" top: "conv13" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } } layer { name: "conv13/scale" type: "Scale" bottom: "conv13" top: "conv13" param { lr_mult: 0.1 decay_mult: 0.0 } param { lr_mult: 0.2 decay_mult: 0.0 } scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv13/relu" type: "ReLU" bottom: "conv13" top: "conv13" }

#######################################from vgg16

==========CONV4 Backwards for M1======

reduce conv5_3 channels

layer { name: "conv5_128" type: "Convolution" bottom: "conv11" top: "conv5_128" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 pad: 0 kernel_size: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv5_128_relu" type: "ReLU" bottom: "conv5_128" top: "conv5_128" }

Upsample conv5_3

layer { name: "conv5_128_up" type: "Deconvolution" bottom: "conv5_128" top: "conv5_128_up" convolution_param { kernel_size: 4 stride: 2 num_output: 128 group: 128 pad: 1 weight_filler: { type: "bilinear" } bias_term: false } param { lr_mult: 0 decay_mult: 0 } }

layer { name: "conv4_128" type: "Convolution" bottom: "conv5" top: "conv4_128" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 pad: 0 kernel_size: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

layer { name: "conv4_128_relu" type: "ReLU" bottom: "conv4_128" top: "conv4_128" }

Crop conv5_3

layer { name: "conv5_128_crop" type: "Crop" bottom: "conv5_128_up" bottom: "conv4_128" top: "conv5_128_crop" crop_param { axis: 2 offset: 0 } }

Eltwise summation

layer { name: "conv4_fuse" type: "Eltwise" bottom: "conv5_128_crop" bottom: "conv4_128" top: "conv4_fuse" eltwise_param { operation: SUM } }

Perform final 3x3 convolution

layer { name: "conv4_fuse_final" type: "Convolution" bottom: "conv4_fuse" top: "conv4_fuse_final" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv4_fuse_final_relu" type: "ReLU" bottom: "conv4_fuse_final" top: "conv4_fuse_final" }

========== M3@OHEM OHEM =========

pool6 is changed, use mobilenetv2 conv6

layer {

name: "pool6"

type: "Pooling"

bottom: "conv5_3"

top: "pool6"

pooling_param {

pool: MAX

kernel_size: 2

stride: 2

}

layer { name: "m3@ssh_3x3_ohem" type: "Convolution" bottom: "conv13" top: "m3@ssh_3x3_output_ohem" param {name:'m3@ssh_3x3_param1'} param {name:'m3@ssh_3x3_param2'} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

Dim red

layer { name: "m3@ssh_dimred_ohem" type: "Convolution" bottom: "conv13" top: "m3@ssh_dimred_output_ohem" param {name: 'm3@ssh_dimred_param1' } param {name: 'm3@ssh_dimred_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false } layer { name: "m3@ssh_dimred_relu_ohem" type: "ReLU" bottom: "m3@ssh_dimred_output_ohem" top: "m3@ssh_dimred_output_ohem" propagate_down: false }

5x5

layer { name: "m3@ssh_5x5_ohem" type: "Convolution" bottom: "m3@ssh_dimred_output_ohem" top: "m3@ssh_5x5_output_ohem" param {name: 'm3@ssh_5x5_param1'} param {name: 'm3@ssh_5x5_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

7x7

layer { name: "m3@ssh_7x7-1_ohem" type: "Convolution" bottom: "m3@ssh_dimred_output_ohem" top: "m3@ssh_7x7-1_output_ohem" param {name: 'm3@ssh_7x7-1_param1'} param {name: 'm3@ssh_7x7-1_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false } layer { name: "m3@ssh_7x7-1_relu_ohem" type: "ReLU" bottom: "m3@ssh_7x7-1_output_ohem" top: "m3@ssh_7x7-1_output_ohem" propagate_down: false }

layer { name: "m3@ssh_7x7" type: "Convolution" bottom: "m3@ssh_7x7-1_output_ohem" top: "m3@ssh_7x7_output_ohem" param {name: 'm3@ssh_7x7_param1'} param {name: 'm3@ssh_7x7_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

layer{ name: "m3@ssh_output_ohem" type: "Concat" bottom: "m3@ssh_3x3_output_ohem" bottom: "m3@ssh_5x5_output_ohem" bottom: "m3@ssh_7x7_output_ohem" top: "m3@ssh_output_ohem" concat_param{ axis: 1 } propagate_down: false propagate_down: false propagate_down: false }

layer { name: "m3@ssh_output_relu_ohem" type: "ReLU" bottom: "m3@ssh_output_ohem" top: "m3@ssh_output_ohem" propagate_down: false } layer { name: "m3@ssh_cls_score_ohem" type: "Convolution" bottom: "m3@ssh_output_ohem" top: "m3@ssh_cls_score_output_ohem" param {name: 'm3@ssh_cls_score_param1'} param {name: 'm3@ssh_cls_score_param2'} convolution_param { num_output: 4 # 2(bg/fg) * 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

layer { bottom: "m3@ssh_cls_score_output_ohem" top: "m3@ssh_cls_score_reshape_output_ohem" name: "m3@ssh_cls_reshape_ohem" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } propagate_down: false }

layer { name: "m3@ssh_cls_prob_ohem" type: "Softmax" bottom: "m3@ssh_cls_score_reshape_output_ohem" top: "m3@ssh_cls_prob_output_ohem" propagate_down: false } layer { name: 'm3@ssh_cls_prob_reshape_ohem' type: 'Reshape' bottom: 'm3@ssh_cls_prob_output_ohem' top: 'm3@ssh_cls_prob_reshape_output_ohem' reshape_param { shape { dim: 0 dim:4 dim: -1 dim: 0 } } propagate_down: false }

========== M3@SSH =========

layer { name: "m3@ssh_3x3" type: "Convolution" bottom: "conv13" top: "m3@ssh_3x3_output" param { lr_mult: 1.0 decay_mult: 1.0 name:'m3@ssh_3x3_param1'} param { lr_mult: 2.0 decay_mult: 0 name:'m3@ssh_3x3_param2'} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

Dim red

layer { name: "m3@ssh_dimred" type: "Convolution" bottom: "conv13" top: "m3@ssh_dimred_output" param { lr_mult: 1.0 decay_mult: 1.0 name: 'm3@ssh_dimred_param1' } param { lr_mult: 2.0 decay_mult: 0 name: 'm3@ssh_dimred_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m3@ssh_dimred_relu" type: "ReLU" bottom: "m3@ssh_dimred_output" top: "m3@ssh_dimred_output" }

5x5

layer { name: "m3@ssh_5x5" type: "Convolution" bottom: "m3@ssh_dimred_output" top: "m3@ssh_5x5_output" param { lr_mult: 1.0 decay_mult: 1.0 name: 'm3@ssh_5x5_param1'} param { lr_mult: 2.0 decay_mult: 0 name: 'm3@ssh_5x5_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

7x7

layer { name: "m3@ssh_7x7-1" type: "Convolution" bottom: "m3@ssh_dimred_output" top: "m3@ssh_7x7-1_output" param { lr_mult: 1.0 decay_mult: 1.0 name: 'm3@ssh_7x7-1_param1'} param { lr_mult: 2.0 decay_mult: 0 name: 'm3@ssh_7x7-1_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m3@ssh_7x7-1_relu" type: "ReLU" bottom: "m3@ssh_7x7-1_output" top: "m3@ssh_7x7-1_output" }

layer { name: "m3@ssh_7x7" type: "Convolution" bottom: "m3@ssh_7x7-1_output" top: "m3@ssh_7x7_output" param { lr_mult: 1.0 decay_mult: 1.0 name: 'm3@ssh_7x7_param1'} param { lr_mult: 2.0 decay_mult: 0 name: 'm3@ssh_7x7_param2'} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

layer{ name: "m3@ssh_output" type: "Concat" bottom: "m3@ssh_3x3_output" bottom: "m3@ssh_5x5_output" bottom: "m3@ssh_7x7_output" top: "m3@ssh_output" concat_param{ axis: 1 } }

layer { name: "m3@ssh_output_relu" type: "ReLU" bottom: "m3@ssh_output" top: "m3@ssh_output" } layer { name: "m3@ssh_cls_score" type: "Convolution" bottom: "m3@ssh_output" top: "m3@ssh_cls_score_output" param { lr_mult: 1.0 decay_mult: 1.0 name: 'm3@ssh_cls_score_param1'} param { lr_mult: 2.0 decay_mult: 0 name: 'm3@ssh_cls_score_param2'} convolution_param { num_output: 4 # 2(bg/fg) 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m3@ssh_bbox_pred" type: "Convolution" bottom: "m3@ssh_output" top: "m3@ssh_bbox_pred_output" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 8 # 4 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "m3@ssh_cls_score_output" top: "m3@ssh_cls_score_reshape_output" name: "m3@ssh_cls_reshape" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } } layer { name: 'm3@ssh_target_layer' type: 'Python' bottom: 'm3@ssh_cls_score_output' bottom: 'gt_boxes' bottom: 'im_info' bottom: 'data' bottom: 'm3@ssh_cls_prob_reshape_output_ohem' top: 'm3@ssh_anchor_labels' top: 'm3@ssh_reg_tragets' top: 'm3@ssh_reg_inside_weights' top: 'm3@ssh_reg_outside_weights' python_param { module: 'SSH.layers.anchor_target_layer' layer: 'AnchorTargetLayer' param_str: "{'feat_stride': 32,'scales': [16,32], 'ratios':[1,], 'allowed_border': 512}" } } layer { name: "m3@ssh_cls_loss" type: "SoftmaxWithLoss" bottom: "m3@ssh_cls_score_reshape_output" bottom: "m3@ssh_anchor_labels" propagate_down: 1 propagate_down: 0 top: "m3@ssh_cls_loss" loss_weight: 1 loss_param { ignore_label: -1 normalize: true } } layer { name: "m3@ssh_reg_loss" type: "SmoothL1Loss" bottom: "m3@ssh_bbox_pred_output" bottom: "m3@ssh_reg_tragets" bottom: 'm3@ssh_reg_inside_weights' bottom: 'm3@ssh_reg_outside_weights' top: "m3@ssh_reg_loss" loss_weight: 1 smooth_l1_loss_param { sigma: 3.0 } }

========= M2@SSH OHEM ==========

layer { name: "m2@ssh_3x3_ohem" type: "Convolution" bottom: "conv11" top: "m2@ssh_3x3_output_ohem" propagate_down: false param {name: "m2@ssh_3x3_param1"} param {name: "m2@ssh_3x3_param2"} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

Dim red

layer { name: "m2@ssh_dimred_ohem" type: "Convolution" bottom: "conv11" top: "m2@ssh_dimred_output_ohem" propagate_down: false param {name: "m2@ssh_dimred_param1"} param {name: "m2@ssh_dimred_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m2@ssh_dimred_relu_ohem" type: "ReLU" bottom: "m2@ssh_dimred_output_ohem" top: "m2@ssh_dimred_output_ohem" propagate_down: false }

5x5

layer { name: "m2@ssh_5x5_ohem" type: "Convolution" bottom: "m2@ssh_dimred_output_ohem" top: "m2@ssh_5x5_output_ohem" propagate_down: false param {name: "m2@ssh_5x5_param1"} param {name: "m2@ssh_5x5_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

7x7

layer { name: "m2@ssh_7x7-1_ohem" type: "Convolution" bottom: "m2@ssh_dimred_output_ohem" top: "m2@ssh_7x7-1_output_ohem" propagate_down: false param {name: "m2@ssh_7x7-1_param1"} param {name: "m2@ssh_7x7-1_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m2@ssh_7x7-1_relu_ohem" type: "ReLU" bottom: "m2@ssh_7x7-1_output_ohem" top: "m2@ssh_7x7-1_output_ohem" propagate_down: false }

layer { name: "m2@ssh_7x7" type: "Convolution" bottom: "m2@ssh_7x7-1_output_ohem" top: "m2@ssh_7x7_output_ohem" propagate_down: false param {name: "m2@ssh_7x7_param1"} param {name: "m2@ssh_7x7_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

layer{ name: "m2@ssh_output_ohem" type: "Concat" bottom: "m2@ssh_3x3_output_ohem" bottom: "m2@ssh_5x5_output_ohem" bottom: "m2@ssh_7x7_output_ohem" top: "m2@ssh_output_ohem" concat_param{ axis: 1 } propagate_down: false propagate_down: false propagate_down: false }

layer { name: "m2@ssh_output_relu_ohem" type: "ReLU" bottom: "m2@ssh_output_ohem" top: "m2@ssh_output_ohem" propagate_down: false } layer { name: "m2@ssh_cls_score_ohem" type: "Convolution" bottom: "m2@ssh_output_ohem" top: "m2@ssh_cls_score_output_ohem" param {name: "m2@ssh_cls_score_param1"} param {name: "m2@ssh_cls_score_param2"} convolution_param { num_output: 4 # 2(bg/fg) * 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

layer { bottom: "m2@ssh_cls_score_output_ohem" top: "m2@ssh_cls_score_reshape_output_ohem" name: "m2@ssh_cls_reshape_ohem" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } propagate_down: false }

layer { name: "m2@ssh_cls_prob_ohem" type: "Softmax" bottom: "m2@ssh_cls_score_reshape_output_ohem" top: "m2@ssh_cls_prob_output_ohem" propagate_down: false } layer { name: 'm2@ssh_cls_prob_reshape_ohem' type: 'Reshape' bottom: 'm2@ssh_cls_prob_output_ohem' top: 'm2@ssh_cls_prob_reshape_output_ohem' reshape_param { shape { dim: 0 dim:4 dim: -1 dim: 0 } } propagate_down: false }

========== M2@SSH =========

layer { name: "m2@ssh_3x3" type: "Convolution" bottom: "conv11" top: "m2@ssh_3x3_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m2@ssh_3x3_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m2@ssh_3x3_param2"} convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

Dim red

layer { name: "m2@ssh_dimred" type: "Convolution" bottom: "conv11" top: "m2@ssh_dimred_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m2@ssh_dimred_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m2@ssh_dimred_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m2@ssh_dimred_relu" type: "ReLU" bottom: "m2@ssh_dimred_output" top: "m2@ssh_dimred_output" }

5x5

layer { name: "m2@ssh_5x5" type: "Convolution" bottom: "m2@ssh_dimred_output" top: "m2@ssh_5x5_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m2@ssh_5x5_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m2@ssh_5x5_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

7x7

layer { name: "m2@ssh_7x7-1" type: "Convolution" bottom: "m2@ssh_dimred_output" top: "m2@ssh_7x7-1_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m2@ssh_7x7-1_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m2@ssh_7x7-1_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m2@ssh_7x7-1_relu" type: "ReLU" bottom: "m2@ssh_7x7-1_output" top: "m2@ssh_7x7-1_output" }

layer { name: "m2@ssh_7x7" type: "Convolution" bottom: "m2@ssh_7x7-1_output" top: "m2@ssh_7x7_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m2@ssh_7x7_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m2@ssh_7x7_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

layer{ name: "m2@ssh_output" type: "Concat" bottom: "m2@ssh_3x3_output" bottom: "m2@ssh_5x5_output" bottom: "m2@ssh_7x7_output" top: "m2@ssh_output" concat_param{ axis: 1 } }

layer { name: "m2@ssh_output_relu" type: "ReLU" bottom: "m2@ssh_output" top: "m2@ssh_output" } layer { name: "m2@ssh_cls_score" type: "Convolution" bottom: "m2@ssh_output" top: "m2@ssh_cls_score_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m2@ssh_cls_score_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m2@ssh_cls_score_param2"} convolution_param { num_output: 4 # 2(bg/fg) 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m2@ssh_bbox_pred" type: "Convolution" bottom: "m2@ssh_output" top: "m2@ssh_bbox_pred_output" param { lr_mult: 1.0 decay_mult: 1.0 } param { lr_mult: 2.0 decay_mult: 0 } convolution_param { num_output: 8 # 4 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "m2@ssh_cls_score_output" top: "m2@ssh_cls_score_reshape_output" name: "m2@ssh_cls_reshape" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } } layer { name: 'm2@ssh_target_layer' type: 'Python' bottom: 'm2@ssh_cls_score_output' bottom: 'gt_boxes' bottom: 'im_info' bottom: 'data' bottom: 'm2@ssh_cls_prob_reshape_output_ohem' top: 'm2@ssh_anchor_labels' top: 'm2@ssh_reg_tragets' top: 'm2@ssh_reg_inside_weights' top: 'm2@ssh_reg_outside_weights' python_param { module: 'SSH.layers.anchor_target_layer' layer: 'AnchorTargetLayer' param_str: "{'feat_stride': 16,'scales': [4,8], 'ratios':[1,]}" } } layer { name: "m2@ssh_cls_loss" type: "SoftmaxWithLoss" bottom: "m2@ssh_cls_score_reshape_output" bottom: "m2@ssh_anchor_labels" propagate_down: 1 propagate_down: 0 top: "m2@ssh_cls_loss" loss_weight: 1 loss_param { ignore_label: -1 normalize: true } } layer { name: "m2@ssh_reg_loss" type: "SmoothL1Loss" bottom: "m2@ssh_bbox_pred_output" bottom: "m2@ssh_reg_tragets" bottom: 'm2@ssh_reg_inside_weights' bottom: 'm2@ssh_reg_outside_weights' top: "m2@ssh_reg_loss" loss_weight: 1 smooth_l1_loss_param { sigma: 3.0 } }

========== M1@SSH OHEM =========

layer { name: "m1@ssh_3x3_ohem" type: "Convolution" bottom: "conv4_fuse_final" top: "m1@ssh_3x3_output_ohem" param {name: "m1@ssh_3x3_param1"} param {name: "m1@ssh_3x3_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

Dim red

layer { name: "m1@ssh_dimred_ohem" type: "Convolution" bottom: "conv4_fuse_final" top: "m1@ssh_dimred_output_ohem" param {name: "m1@ssh_dimred_param1"} param {name: "m1@ssh_dimred_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false } layer { name: "m1@ssh_dimred_relu_ohem" type: "ReLU" bottom: "m1@ssh_dimred_output_ohem" top: "m1@ssh_dimred_output_ohem" propagate_down: false }

5x5

layer { name: "m1@ssh_5x5_ohem" type: "Convolution" bottom: "m1@ssh_dimred_output_ohem" top: "m1@ssh_5x5_output_ohem" param {name: "m1@ssh_5x5_param1"} param {name: "m1@ssh_5x5_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

7x7

layer { name: "m1@ssh_7x7-1_ohem" type: "Convolution" bottom: "m1@ssh_dimred_output_ohem" top: "m1@ssh_7x7-1_output_ohem" param {name: "m1@ssh_7x7-1_param1"} param {name: "m1@ssh_7x7-1_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false } layer { name: "m1@ssh_7x7-1_relu_ohem" type: "ReLU" bottom: "m1@ssh_7x7-1_output_ohem" top: "m1@ssh_7x7-1_output_ohem" propagate_down: false }

layer { name: "m1@ssh_7x7" type: "Convolution" bottom: "m1@ssh_7x7-1_output_ohem" top: "m1@ssh_7x7_output_ohem" param {name: "m1@ssh_7x7_param1"} param {name: "m1@ssh_7x7_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

layer{ name: "m1@ssh_output_ohem" type: "Concat" bottom: "m1@ssh_3x3_output_ohem" bottom: "m1@ssh_5x5_output_ohem" bottom: "m1@ssh_7x7_output_ohem" top: "m1@ssh_output_ohem" concat_param{ axis: 1 } propagate_down: false propagate_down: false propagate_down: false } layer { name: "m1@ssh_output_relu_ohem" type: "ReLU" bottom: "m1@ssh_output_ohem" top: "m1@ssh_output_ohem" propagate_down: false } layer { name: "m1@ssh_cls_score_ohem" type: "Convolution" bottom: "m1@ssh_output_ohem" top: "m1@ssh_cls_score_output_ohem" param {name: "m1@ssh_cls_score_param1"} param {name: "m1@ssh_cls_score_param2"} convolution_param { num_output: 4 # 2(bg/fg) * 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } propagate_down: false }

layer { bottom: "m1@ssh_cls_score_output_ohem" top: "m1@ssh_cls_score_reshape_output_ohem" name: "m1@ssh_cls_reshape_ohem" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } propagate_down: false }

layer { name: "m1@ssh_cls_prob_ohem" type: "Softmax" bottom: "m1@ssh_cls_score_reshape_output_ohem" top: "m1@ssh_cls_prob_output_ohem" propagate_down: false } layer { name: 'm1@ssh_cls_prob_reshape_ohem' type: 'Reshape' bottom: 'm1@ssh_cls_prob_output_ohem' top: 'm1@ssh_cls_prob_reshape_output_ohem' reshape_param { shape { dim: 0 dim:4 dim: -1 dim: 0 } } propagate_down: false }

========== M1@SSH =========

layer { name: "m1@ssh_3x3" type: "Convolution" bottom: "conv4_fuse_final" top: "m1@ssh_3x3_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m1@ssh_3x3_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m1@ssh_3x3_param2"} convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

Dim red

layer { name: "m1@ssh_dimred" type: "Convolution" bottom: "conv4_fuse_final" top: "m1@ssh_dimred_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m1@ssh_dimred_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m1@ssh_dimred_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m1@ssh_dimred_relu" type: "ReLU" bottom: "m1@ssh_dimred_output" top: "m1@ssh_dimred_output" }

5x5

layer { name: "m1@ssh_5x5" type: "Convolution" bottom: "m1@ssh_dimred_output" top: "m1@ssh_5x5_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m1@ssh_5x5_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m1@ssh_5x5_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

7x7

layer { name: "m1@ssh_7x7-1" type: "Convolution" bottom: "m1@ssh_dimred_output" top: "m1@ssh_7x7-1_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m1@ssh_7x7-1_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m1@ssh_7x7-1_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m1@ssh_7x7-1_relu" type: "ReLU" bottom: "m1@ssh_7x7-1_output" top: "m1@ssh_7x7-1_output" }

layer { name: "m1@ssh_7x7" type: "Convolution" bottom: "m1@ssh_7x7-1_output" top: "m1@ssh_7x7_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m1@ssh_7x7_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m1@ssh_7x7_param2"} convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } }

layer{ name: "m1@ssh_output" type: "Concat" bottom: "m1@ssh_3x3_output" bottom: "m1@ssh_5x5_output" bottom: "m1@ssh_7x7_output" top: "m1@ssh_output" concat_param{ axis: 1 } } layer { name: "m1@ssh_output_relu" type: "ReLU" bottom: "m1@ssh_output" top: "m1@ssh_output" } layer { name: "m1@ssh_cls_score" type: "Convolution" bottom: "m1@ssh_output" top: "m1@ssh_cls_score_output" param { lr_mult: 1.0 decay_mult: 1.0 name: "m1@ssh_cls_score_param1"} param { lr_mult: 2.0 decay_mult: 0 name: "m1@ssh_cls_score_param2"} convolution_param { num_output: 4 # 2(bg/fg) 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "m1@ssh_bbox_pred" type: "Convolution" bottom: "m1@ssh_output" top: "m1@ssh_bbox_pred_output" param { lr_mult: 1.0 decay_mult: 1.0 } param { lr_mult: 2.0 decay_mult: 0 } convolution_param { num_output: 8 # 4 21(anchors) kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "m1@ssh_cls_score_output" top: "m1@ssh_cls_score_reshape_output" name: "m1@ssh_cls_reshape" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } } layer { name: 'm1@ssh_target_layer' type: 'Python' bottom: 'm1@ssh_cls_score_output' bottom: 'gt_boxes' bottom: 'im_info' bottom: 'data' bottom: 'm1@ssh_cls_prob_reshape_output_ohem' top: 'm1@ssh_anchor_labels' top: 'm1@ssh_reg_tragets' top: 'm1@ssh_reg_inside_weights' top: 'm1@ssh_reg_outside_weights' python_param { module: 'SSH.layers.anchor_target_layer' layer: 'AnchorTargetLayer' param_str: "{'feat_stride': 8,'scales': [1,2], 'ratios':[1,]}" } } layer { name: "m1@ssh_cls_loss" type: "SoftmaxWithLoss" bottom: "m1@ssh_cls_score_reshape_output" bottom: "m1@ssh_anchor_labels" propagate_down: 1 propagate_down: 0 top: "m1@ssh_cls_loss" loss_weight: 1 loss_param { ignore_label: -1 normalize: true } } layer { name: "m1@ssh_reg_loss" type: "SmoothL1Loss" bottom: "m1@ssh_bbox_pred_output" bottom: "m1@ssh_reg_tragets" bottom: 'm1@ssh_reg_inside_weights' bottom: 'm1@ssh_reg_outside_weights' top: "m1@ssh_reg_loss" loss_weight: 1 smooth_l1_loss_param { sigma: 3.0 } } `

chuanqi305 / MobileNet-SSD

forward time and model size #128

scale: 0.017

mirror: false

crop_size: 224

mean_value: [127.5, 127.5, 127.5]

}

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

engine: CAFFE

==========CONV4 Backwards for M1======

reduce conv5_3 channels

Upsample conv5_3

Crop conv5_3

Eltwise summation

Perform final 3x3 convolution

========== M3@OHEM OHEM =========

pool6 is changed, use mobilenetv2 conv6

layer {

name: "pool6"

type: "Pooling"

bottom: "conv5_3"

top: "pool6"

pooling_param {

pool: MAX

kernel_size: 2

stride: 2

}

}

Dim red

5x5

7x7

========== M3@SSH =========

Dim red

5x5

7x7

========= M2@SSH OHEM ==========

Dim red

5x5

7x7

========== M2@SSH =========

Dim red

5x5

7x7

========== M1@SSH OHEM =========

Dim red

5x5

7x7

========== M1@SSH =========

Dim red

5x5

7x7