Hi @lsj910128 , have you solved this? I'm trying to do the same but cannot figure out how to do it. This is my prototxt file, note that I added an extra python layer (BinaryMaskLayer) to convert values of classes in a 2 array into binary values:

name: "VGG_ILSVRC_16_layers" layer { name: 'input-data' type: 'Python' top: 'data' top: 'im_info' top: 'gt_boxes' top: 'seg_mask_inds' top: 'flipped' python_param { module: 'roi_data_layer.layer' layer: 'RoIDataLayer' param_str: "'num_classes': 3" # 2 obj categories + 1 background } } layer { name: "conv1_1" type: "Convolution" bottom: "data" top: "conv1_1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 } } layer { name: "relu1_1" type: "ReLU" bottom: "conv1_1" top: "conv1_1" } layer { name: "conv1_2" type: "Convolution" bottom: "conv1_1" top: "conv1_2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 } } layer { name: "relu1_2" type: "ReLU" bottom: "conv1_2" top: "conv1_2" } layer { name: "pool1" type: "Pooling" bottom: "conv1_2" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2_1" type: "Convolution" bottom: "pool1" top: "conv2_1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 } } layer { name: "relu2_1" type: "ReLU" bottom: "conv2_1" top: "conv2_1" } layer { name: "conv2_2" type: "Convolution" bottom: "conv2_1" top: "conv2_2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 } } layer { name: "relu2_2" type: "ReLU" bottom: "conv2_2" top: "conv2_2" } layer { name: "pool2" type: "Pooling" bottom: "conv2_2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv3_1" type: "Convolution" bottom: "pool2" top: "conv3_1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 } } layer { name: "relu3_1" type: "ReLU" bottom: "conv3_1" top: "conv3_1" } layer { name: "conv3_2" type: "Convolution" bottom: "conv3_1" top: "conv3_2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 } } layer { name: "relu3_2" type: "ReLU" bottom: "conv3_2" top: "conv3_2" } layer { name: "conv3_3" type: "Convolution" bottom: "conv3_2" top: "conv3_3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 } } layer { name: "relu3_3" type: "ReLU" bottom: "conv3_3" top: "conv3_3" } layer { name: "pool3" type: "Pooling" bottom: "conv3_3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4_1" type: "Convolution" bottom: "pool3" top: "conv4_1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu4_1" type: "ReLU" bottom: "conv4_1" top: "conv4_1" } layer { name: "conv4_2" type: "Convolution" bottom: "conv4_1" top: "conv4_2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu4_2" type: "ReLU" bottom: "conv4_2" top: "conv4_2" } layer { name: "conv4_3" type: "Convolution" bottom: "conv4_2" top: "conv4_3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu4_3" type: "ReLU" bottom: "conv4_3" top: "conv4_3" } layer { name: "pool4" type: "Pooling" bottom: "conv4_3" top: "pool4" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv5_1" type: "Convolution" bottom: "pool4" top: "conv5_1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu5_1" type: "ReLU" bottom: "conv5_1" top: "conv5_1" } layer { name: "conv5_2" type: "Convolution" bottom: "conv5_1" top: "conv5_2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu5_2" type: "ReLU" bottom: "conv5_2" top: "conv5_2" } layer { name: "conv5_3" type: "Convolution" bottom: "conv5_2" top: "conv5_3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu5_3" type: "ReLU" bottom: "conv5_3" top: "conv5_3" }

========= RPN ============

layer { name: "rpn_conv/3x3" type: "Convolution" bottom: "conv5_3" top: "rpn/output" param { lr_mult: 1.0 } param { lr_mult: 2.0 } convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "rpn_relu/3x3" type: "ReLU" bottom: "rpn/output" top: "rpn/output" }

layer { name: "rpn_cls_score" type: "Convolution" bottom: "rpn/output" top: "rpn_cls_score" param { lr_mult: 1.0 } param { lr_mult: 2.0 } convolution_param {

num_output: 24

num_output: 30 # 2(bg/fg) * 15(n_anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }

} }

layer { name: "rpn_bbox_pred" type: "Convolution" bottom: "rpn/output" top: "rpn_bbox_pred" param { lr_mult: 1.0 } param { lr_mult: 2.0 } convolution_param {

num_output: 48 # 4 * 12(anchors)

num_output: 60   # 4 * 15(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }

} }

layer { bottom: "rpn_cls_score" top: "rpn_cls_score_reshape" name: "rpn_cls_score_reshape" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } }

layer { name: 'rpn-data' type: 'Python' bottom: 'rpn_cls_score' bottom: 'gt_boxes' bottom: 'im_info' bottom: 'data' top: 'rpn_labels' top: 'rpn_bbox_targets' top: 'rpn_bbox_inside_weights' top: 'rpn_bbox_outside_weights' python_param { module: 'rpn.anchor_target_layer' layer: 'AnchorTargetLayer'

param_str: "'feat_stride': 16 \n'scales': !!python/tuple [4, 8, 16, 32]"

param_str: "'feat_stride': 16 \n'scales': !!python/tuple [2, 4, 8, 16, 32]"

} }

layer { name: "rpn_loss_cls" type: "SoftmaxWithLoss" bottom: "rpn_cls_score_reshape" bottom: "rpn_labels" propagate_down: 1 propagate_down: 0 top: "rpn_cls_loss" loss_weight: 1 loss_param { ignore_label: -1 normalize: true } }

layer { name: "rpn_loss_bbox" type: "SmoothL1Loss" bottom: "rpn_bbox_pred" bottom: "rpn_bbox_targets" bottom: 'rpn_bbox_inside_weights' bottom: 'rpn_bbox_outside_weights' top: "rpn_loss_bbox" loss_weight: 1 smooth_l1_loss_param { sigma: 3.0 } }

========= RoI Proposal ============

layer { name: "rpn_cls_prob" type: "Softmax" bottom: "rpn_cls_score_reshape" top: "rpn_cls_prob" }

layer { name: 'rpn_cls_prob_reshape' type: 'Reshape' bottom: 'rpn_cls_prob' top: 'rpn_cls_prob_reshape'

reshape_param { shape { dim: 0 dim: 24 dim: -1 dim: 0 } }

reshape_param { shape { dim: 0 dim: 30 dim: -1 dim: 0 } } }

layer { name: 'proposal' type: 'Python' bottom: 'rpn_cls_prob_reshape' bottom: 'rpn_bbox_pred' bottom: 'im_info' top: 'rpn_rois' python_param { module: 'rpn.proposal_layer' layer: 'ProposalLayer'

param_str: "'feat_stride': 16 \n'scales': !!python/tuple [4, 8, 16, 32]"

param_str: "'feat_stride': 16 \n'scales': !!python/tuple [2, 4, 8, 16, 32]"

} }

layer { name: 'roi-data' type: 'Python' bottom: 'rpn_rois' bottom: 'gt_boxes' bottom: 'im_info' bottom: 'seg_mask_inds' bottom: 'flipped' top: 'rois' top: 'labels' top: 'bbox_targets' top: 'bbox_inside_weights' top: 'bbox_outside_weights' top: 'mask_targets' top: 'rois_pos' top: 'label_for_mask' python_param { module: 'rpn.proposal_target_layer_ppsigmoid' layer: 'ProposalTargetLayer' param_str: "'num_classes': 3" } }

========= RCNN ============

layer { name: "roi_pool5"

type: "ROIPooling"

type: "ROIAlignment2"

type: "ROIAlignment" bottom: "conv5_3" #bottom[0] bottom: "rois" #bottom[1] top: "pool5"

roi_pooling_param {

roi_alignment2_param {

roi_alignment_param { pooled_w: 7 pooled_h: 7 spatial_scale: 0.0625 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 4096 } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 4096 } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "cls_score" type: "InnerProduct" bottom: "fc7" top: "cls_score" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output:3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "bbox_pred" type: "InnerProduct" bottom: "fc7" top: "bbox_pred" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 12 # = 4 * 3, i.e., box coordinate for each class weight_filler { type: "gaussian" std: 0.001 } bias_filler { type: "constant" value: 0 } } } layer { name: "loss_cls" type: "SoftmaxWithLoss" bottom: "cls_score" bottom: "labels" propagate_down: 1 propagate_down: 0 top: "loss_cls" loss_weight: 3 } layer { name: "loss_bbox" type: "SmoothL1Loss" bottom: "bbox_pred" bottom: "bbox_targets" bottom: "bbox_inside_weights" bottom: "bbox_outside_weights" top: "loss_bbox" loss_weight: 2 }

##############Mask branch#################################### layer { name: "roi_pool5_2"

type: "ROIPooling"

type: "ROIAlignment2"

type: "ROIAlignment" bottom: "conv5_3" bottom: "rois_pos" top: "pool5_2"

roi_pooling_param {

roi_alignment2_param{

roi_alignment_param{ pooled_w: 7 pooled_h: 7 spatial_scale: 0.0625 # 1/16 } }

Conv-Relu 1

layer { name: "pool5_2_conv" type: "Convolution" bottom: "pool5_2" top: "pool5_2_conv" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 512 kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: "pool5_2_conv_relu" type: "ReLU" bottom: "pool5_2_conv" top: "pool5_2_conv_relu" }

Conv-Relu 2

layer { name: "pool5_2_conv2" type: "Convolution" bottom: "pool5_2_conv_relu" top: "pool5_2_conv2" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: "pool5_2_conv2_relu" type: "ReLU" bottom: "pool5_2_conv2" top: "pool5_2_conv2_relu" }

Deconv 1

layer { name: "mask_deconv1" type: "Deconvolution"

bottom: "pool5_2_conv_relu"

bottom: "pool5_2_conv2_relu" top: "mask_deconv1" param { lr_mult: 1 decay_mult: 1.0 } param { lr_mult: 2 decay_mult: 0} convolution_param { num_output: 256

pad: 1 stride: 2 kernel_size: 4 # 14x14

#pad: 1 stride: 3 kernel_size: 6  # 22x22
pad: 1 stride: 4 kernel_size: 8 # 30x30
group: 256 #apply independently
weight_filler { type: "bilinear" }
#bias_filler { type: "constant" value: 1 }

} }

Conv-Relu 3

layer { name: "pool5_2_conv3" type: "Convolution" bottom: "mask_deconv1" top: "pool5_2_conv3" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: "pool5_2_conv3_relu" type: "ReLU" bottom: "pool5_2_conv3" top: "pool5_2_conv3_relu" }

Conv-Relu 4

layer { name: "pool5_2_conv4" type: "Convolution" bottom: "pool5_2_conv3_relu" top: "pool5_2_conv4" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: "pool5_2_conv4_relu" type: "ReLU" bottom: "pool5_2_conv4" top: "pool5_2_conv4_relu" }

Deconv 2

layer { name: "mask_deconv2" type: "Deconvolution" bottom: "pool5_2_conv4_relu" top: "mask_deconv2" param { lr_mult: 1 decay_mult: 1.0 } param { lr_mult: 2 decay_mult: 0} convolution_param { num_output: 256

pad: 1 stride: 2 kernel_size: 4 # 28x28

#pad: 1 stride: 8 kernel_size: 16 # 490x490 
pad: 1 stride: 4 kernel_size: 8
group: 256 #apply independently
weight_filler { type: "bilinear" }
#bias_filler { type: "constant" value: 1 }

} }

Conv-Relu 5

layer { name: "pool5_2_conv5" type: "Convolution" bottom: "mask_deconv2" top: "pool5_2_conv5" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: "pool5_2_conv5_relu" type: "ReLU" bottom: "pool5_2_conv5" top: "pool5_2_conv5_relu" }

Conv-Relu 6

layer { name: "pool5_2_conv6" type: "Convolution" bottom: "pool5_2_conv5_relu" top: "pool5_2_conv6" param { lr_mult: 1.0 decay_mult: 1.0} param { lr_mult: 2.0 decay_mult: 0} convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: "pool5_2_conv6_relu" type: "ReLU" bottom: "pool5_2_conv6" top: "pool5_2_conv6_relu" }

Deconv 3

layer { name: "mask_deconv3" type: "Deconvolution" bottom: "pool5_2_conv6_relu" top: "mask_deconv3" param { lr_mult: 1 decay_mult: 1.0 } param { lr_mult: 2 decay_mult: 0} convolution_param { num_output: 256 pad: 1 stride: 2 kernel_size: 4

pad: 1 stride: 8 kernel_size: 16

#pad: 1 stride: 4 kernel_size: 8
group: 256 #apply independently
weight_filler { type: "bilinear" }
#bias_filler { type: "constant" value: 1 }

} }

layer { name: "mask_score" type: "Convolution" bottom: "mask_deconv3" # top: "mask_score" param { lr_mult: 1.0 decay_mult: 1.0 } param { lr_mult: 2.0 decay_mult: 0 } convolution_param { num_output: 3 # 2 classes + 1 background kernel_size: 1 pad: 0 weight_filler {type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } }

layer { name: 'binary-mask' type: 'Python' bottom: 'mask_score' bottom: 'mask_targets' #from lib/rpn/proposal_target_layer.py roi-data bottom: 'label_for_mask' #from lib/rpn/proposal_target_layer.py roi-data top: 'mask_score2' top: 'binary_mask' python_param { module: 'rpn.binary_mask' layer: 'BinaryMaskLayer' param_str: "'num_classes': 3" } }

layer { name: "loss_mask" type: "SigmoidCrossEntropyLoss" bottom: 'mask_score2' bottom: "binary_mask" top: "loss_mask" loss_weight: 0.0003 propagate_down: true # backprop to prediction propagate_down: false # don't backprop to labels }

nqanh / affordance-net

About loss function #16

num_output: 10 # 9 affordance classes + 1 background

bottom: "mask_score_reshape"

normalize: false

========= RPN ============

num_output: 24

num_output: 48 # 4 * 12(anchors)

param_str: "'feat_stride': 16 \n'scales': !!python/tuple [4, 8, 16, 32]"

========= RoI Proposal ============

reshape_param { shape { dim: 0 dim: 24 dim: -1 dim: 0 } }

param_str: "'feat_stride': 16 \n'scales': !!python/tuple [4, 8, 16, 32]"

========= RCNN ============

type: "ROIPooling"

type: "ROIAlignment2"

roi_pooling_param {

roi_alignment2_param {

type: "ROIPooling"

type: "ROIAlignment2"

roi_pooling_param {

roi_alignment2_param{

Conv-Relu 1

Conv-Relu 2

Deconv 1

bottom: "pool5_2_conv_relu"

pad: 1 stride: 2 kernel_size: 4 # 14x14

Conv-Relu 3

Conv-Relu 4

Deconv 2

pad: 1 stride: 2 kernel_size: 4 # 28x28

Conv-Relu 5

Conv-Relu 6

Deconv 3

pad: 1 stride: 8 kernel_size: 16