根据论文创建 Caffe model

论文描述模型时，说了有 fast normalization layer 和 ReLU，但实际从 FaceId的代码来看，模型解析出来的结果如下图所示（将 Eltwise 层直接认为是 Conv 层的BIAS进行了合并，pad层直接认为是Conv的PAD，第一个均值处理是根据代码逻辑推测的，实际是一个 Eltwise的 BIAS_ADD，从数据看，应该是均值）：

photo_0816_1a_small

并没有看到论文中提到的 ReLU 层，实际上在FaceIdentification工程中，都没有实现 ReLU。原本是想通过分析代码后，跟踪模型数据，推测出实际的模型结构，然后建个 caffe 的模型训练自己的结果，但发现与论文有差异。

根据论文描述，每个conv层都应该有一个ReLU，Fast Normalization层位于 ReLU之前，如果通过BN代替FN，论文中的模型应该是(根据论文描述，增加了dropout)，与上图存在明显的差异。下面为训练模型，deploy时没有dropout可以理解，但没有ReLU是可以的吗？

name: "SeetaFace"
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    mirror: true
    crop_size: 228
    mean_file: "data/face_mean.binaryproto"
  }
  data_param {
    source: "data/face_train_lmdb"
    batch_size: 60
    backend: LMDB
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    mirror: false
    crop_size: 228
    mean_file: "data/face_mean.binaryproto"
  }
  data_param {
    source: "data/face_val_lmdb"
    batch_size: 60
    backend: LMDB
  }
}

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 48
    kernel_size: 9
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
    bottom: "conv1"
    top: "conv1"
    name: "bn_conv1"
    type: "BatchNorm"
}

layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}

layer {
    bottom: "conv2"
    top: "conv2"
    name: "bn_conv2"
    type: "BatchNorm"
}

layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}

layer {
  name: "conv3"
  type: "Convolution"
  bottom: "conv2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
    bottom: "conv3"
    top: "conv3"
    name: "bn_conv3"
    type: "BatchNorm"
}

layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv3"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

layer {
  name: "conv4"
  type: "Convolution"
  bottom: "pool2"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}

layer {
    bottom: "conv4"
    top: "conv4"
    name: "bn_conv4"
    type: "BatchNorm"
}

layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 192
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
    bottom: "conv5"
    top: "conv5"
    name: "bn_conv5"
    type: "BatchNorm"
}

layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}

layer {
  name: "conv6"
  type: "Convolution"
  bottom: "conv5"
  top: "conv6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 192
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
    bottom: "conv6"
    top: "conv6"
    name: "bn_conv6"
    type: "BatchNorm"
}

layer {
  name: "relu6"
  type: "ReLU"
  bottom: "conv6"
  top: "conv6"
}

layer {
  name: "conv7"
  type: "Convolution"
  bottom: "conv6"
  top: "conv7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}

layer {
    bottom: "conv7"
    top: "conv7"
    name: "bn_conv7"
    type: "BatchNorm"
}

layer {
  name: "relu7"
  type: "ReLU"
  bottom: "conv7"
  top: "conv7"
}

layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv7"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

layer {
  name: "fc1"
  type: "InnerProduct"
  bottom: "pool3"
  top: "fc1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc1"
  top: "fc1"
}
layer {
  name: "drop1"
  type: "Dropout"
  bottom: "fc1"
  top: "fc1"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc2"
  type: "InnerProduct"
  bottom: "fc1"
  top: "fc2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc2"
  top: "fc2"
}
layer {
  name: "drop2"
  type: "Dropout"
  bottom: "fc2"
  top: "fc2"
  dropout_param {
    dropout_ratio: 0.5
  }
}

layer {
  name: "fc3"
  type: "InnerProduct"
  bottom: "fc2"
  top: "fc3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 10575
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc3"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc3"
  bottom: "label"
  top: "loss"
}

seetaface / SeetaFaceEngine

根据论文创建 Caffe model #201