MhLiao / TextBoxes_plusplus

TextBoxes++: A Single-Shot Oriented Scene Text Detector
953 stars 279 forks source link

Prototext for pre-trained model #62

Closed shankarj67 closed 5 years ago

shankarj67 commented 6 years ago

Hi @MhLiao, thank you for writing this code.

I am converting this code into keras tutorial.

First I need to convert the pre-trained model weight model_pre_train_syn.caffemodel into an hdf5 format in order to use in keras. To convert the .caffemodel into .hdf5 format, I need a .prototext file which I am not getting, there is one deploy.prototxt which is used on the top of this pre-trained model if I am not wrong.

Is there any way to convert or use this caffemodel weight in keras and then create a layer on the top of that.

MhLiao commented 6 years ago

@shankarj67 This is my train.prototxt. I hope it is helpfull to you.

name: "VGG_text_text_polygon_precise_384x384_train"
layer {
  name: "data"
  type: "AnnotatedData"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  transform_param {
    mirror: false
    mean_value: 104.0
    mean_value: 117.0
    mean_value: 123.0
    resize_param {
      prob: 1.0
      resize_mode: WARP
      height: 384
      width: 384
      interp_mode: LINEAR
      interp_mode: AREA
      interp_mode: NEAREST
      interp_mode: CUBIC
      interp_mode: LANCZOS4
    emit_constraint {
      emit_type: CENTER
    distort_param {
      brightness_prob: 0.5
      brightness_delta: 32.0
      contrast_prob: 0.5
      contrast_lower: 0.5
      contrast_upper: 1.5
      hue_prob: 0.5
      hue_delta: 18.0
      saturation_prob: 0.5
      saturation_lower: 0.5
      saturation_upper: 1.5
      random_order_prob: 0.0
    expand_param {
      prob: 0.5
      max_expand_ratio: 4.0
  data_param {
    source: "/home/mhliao/data/syn_polygon_precise/lmdb/text_train_polygon_lmdb"
    batch_size: 32
    backend: LMDB
  annotated_data_param {
    batch_sampler {
      max_sample: 1
      max_trials: 1
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_object_coverage: 0.10000000149
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_object_coverage: 0.300000011921
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_object_coverage: 0.5
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_object_coverage: 0.699999988079
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_object_coverage: 0.899999976158
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        max_jaccard_overlap: 1.0
      max_sample: 1
      max_trials: 50
    label_map_file: "data/text/labelmap_voc.prototxt"
layer {
  name: "conv1_1"
  type: "Convolution"
  bottom: "data"
  top: "conv1_1"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu1_1"
  type: "ReLU"
  bottom: "conv1_1"
  top: "conv1_1"
layer {
  name: "conv1_2"
  type: "Convolution"
  bottom: "conv1_1"
  top: "conv1_2"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  convolution_param {
    num_output: 64
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu1_2"
  type: "ReLU"
  bottom: "conv1_2"
  top: "conv1_2"
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1_2"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
layer {
  name: "conv2_1"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2_1"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu2_1"
  type: "ReLU"
  bottom: "conv2_1"
  top: "conv2_1"
layer {
  name: "conv2_2"
  type: "Convolution"
  bottom: "conv2_1"
  top: "conv2_2"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu2_2"
  type: "ReLU"
  bottom: "conv2_2"
  top: "conv2_2"
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2_2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
layer {
  name: "conv3_1"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu3_1"
  type: "ReLU"
  bottom: "conv3_1"
  top: "conv3_1"
layer {
  name: "conv3_2"
  type: "Convolution"
  bottom: "conv3_1"
  top: "conv3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu3_2"
  type: "ReLU"
  bottom: "conv3_2"
  top: "conv3_2"
layer {
  name: "conv3_3"
  type: "Convolution"
  bottom: "conv3_2"
  top: "conv3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu3_3"
  type: "ReLU"
  bottom: "conv3_3"
  top: "conv3_3"
layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv3_3"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
layer {
  name: "conv4_1"
  type: "Convolution"
  bottom: "pool3"
  top: "conv4_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu4_1"
  type: "ReLU"
  bottom: "conv4_1"
  top: "conv4_1"
layer {
  name: "conv4_2"
  type: "Convolution"
  bottom: "conv4_1"
  top: "conv4_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu4_2"
  type: "ReLU"
  bottom: "conv4_2"
  top: "conv4_2"
layer {
  name: "conv4_3"
  type: "Convolution"
  bottom: "conv4_2"
  top: "conv4_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu4_3"
  type: "ReLU"
  bottom: "conv4_3"
  top: "conv4_3"
layer {
  name: "pool4"
  type: "Pooling"
  bottom: "conv4_3"
  top: "pool4"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
layer {
  name: "conv5_1"
  type: "Convolution"
  bottom: "pool4"
  top: "conv5_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    dilation: 1
layer {
  name: "relu5_1"
  type: "ReLU"
  bottom: "conv5_1"
  top: "conv5_1"
layer {
  name: "conv5_2"
  type: "Convolution"
  bottom: "conv5_1"
  top: "conv5_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    dilation: 1
layer {
  name: "relu5_2"
  type: "ReLU"
  bottom: "conv5_2"
  top: "conv5_2"
layer {
  name: "conv5_3"
  type: "Convolution"
  bottom: "conv5_2"
  top: "conv5_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    dilation: 1
layer {
  name: "relu5_3"
  type: "ReLU"
  bottom: "conv5_3"
  top: "conv5_3"
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5_3"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
layer {
  name: "fc6"
  type: "Convolution"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 1024
    pad: 6
    kernel_size: 3
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    dilation: 6
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
layer {
  name: "fc7"
  type: "Convolution"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 1024
    kernel_size: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
layer {
  name: "conv6_1"
  type: "Convolution"
  bottom: "fc7"
  top: "conv6_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv6_1_relu"
  type: "ReLU"
  bottom: "conv6_1"
  top: "conv6_1"
layer {
  name: "conv6_2"
  type: "Convolution"
  bottom: "conv6_1"
  top: "conv6_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv6_2_relu"
  type: "ReLU"
  bottom: "conv6_2"
  top: "conv6_2"
layer {
  name: "conv7_1"
  type: "Convolution"
  bottom: "conv6_2"
  top: "conv7_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv7_1_relu"
  type: "ReLU"
  bottom: "conv7_1"
  top: "conv7_1"
layer {
  name: "conv7_2"
  type: "Convolution"
  bottom: "conv7_1"
  top: "conv7_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv7_2_relu"
  type: "ReLU"
  bottom: "conv7_2"
  top: "conv7_2"
layer {
  name: "conv8_1"
  type: "Convolution"
  bottom: "conv7_2"
  top: "conv8_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv8_1_relu"
  type: "ReLU"
  bottom: "conv8_1"
  top: "conv8_1"
layer {
  name: "conv8_2"
  type: "Convolution"
  bottom: "conv8_1"
  top: "conv8_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv8_2_relu"
  type: "ReLU"
  bottom: "conv8_2"
  top: "conv8_2"
layer {
  name: "conv9_1"
  type: "Convolution"
  bottom: "conv8_2"
  top: "conv9_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv9_1_relu"
  type: "ReLU"
  bottom: "conv9_1"
  top: "conv9_1"
layer {
  name: "conv9_2"
  type: "Convolution"
  bottom: "conv9_1"
  top: "conv9_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
layer {
  name: "conv9_2_relu"
  type: "ReLU"
  bottom: "conv9_2"
  top: "conv9_2"
layer {
  name: "conv4_3_norm"
  type: "Normalize"
  bottom: "conv4_3"
  top: "conv4_3_norm"
  norm_param {
    across_spatial: false
    scale_filler {
      type: "constant"
      value: 20.0
    channel_shared: false
layer {
  name: "conv4_3_norm_mbox_loc"
  type: "Convolution"
  bottom: "conv4_3_norm"
  top: "conv4_3_norm_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 240
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv4_3_norm_mbox_loc_perm"
  type: "Permute"
  bottom: "conv4_3_norm_mbox_loc"
  top: "conv4_3_norm_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv4_3_norm_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv4_3_norm_mbox_loc_perm"
  top: "conv4_3_norm_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv4_3_norm_mbox_conf"
  type: "Convolution"
  bottom: "conv4_3_norm"
  top: "conv4_3_norm_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 40
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv4_3_norm_mbox_conf_perm"
  type: "Permute"
  bottom: "conv4_3_norm_mbox_conf"
  top: "conv4_3_norm_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv4_3_norm_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv4_3_norm_mbox_conf_perm"
  top: "conv4_3_norm_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv4_3_norm_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv4_3_norm"
  bottom: "data"
  top: "conv4_3_norm_mbox_priorbox"
  prior_box_param {
    min_size: 30.0
    max_size: 60.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    aspect_ratio: 4.0
    aspect_ratio: 5.0
    flip: true
    clip: false
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
    step: 8.0
    offset: 0.5
    denser_prior_boxes: true
layer {
  name: "fc7_mbox_loc"
  type: "Convolution"
  bottom: "fc7"
  top: "fc7_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 240
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "fc7_mbox_loc_perm"
  type: "Permute"
  bottom: "fc7_mbox_loc"
  top: "fc7_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "fc7_mbox_loc_flat"
  type: "Flatten"
  bottom: "fc7_mbox_loc_perm"
  top: "fc7_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "fc7_mbox_conf"
  type: "Convolution"
  bottom: "fc7"
  top: "fc7_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 40
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "fc7_mbox_conf_perm"
  type: "Permute"
  bottom: "fc7_mbox_conf"
  top: "fc7_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "fc7_mbox_conf_flat"
  type: "Flatten"
  bottom: "fc7_mbox_conf_perm"
  top: "fc7_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "fc7_mbox_priorbox"
  type: "PriorBox"
  bottom: "fc7"
  bottom: "data"
  top: "fc7_mbox_priorbox"
  prior_box_param {
    min_size: 30.0
    max_size: 90.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    aspect_ratio: 4.0
    aspect_ratio: 5.0
    flip: true
    clip: false
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
    step: 16.0
    offset: 0.5
    denser_prior_boxes: true
layer {
  name: "conv6_2_mbox_loc"
  type: "Convolution"
  bottom: "conv6_2"
  top: "conv6_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 240
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv6_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv6_2_mbox_loc"
  top: "conv6_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv6_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv6_2_mbox_loc_perm"
  top: "conv6_2_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv6_2_mbox_conf"
  type: "Convolution"
  bottom: "conv6_2"
  top: "conv6_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 40
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv6_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv6_2_mbox_conf"
  top: "conv6_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv6_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv6_2_mbox_conf_perm"
  top: "conv6_2_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv6_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv6_2"
  bottom: "data"
  top: "conv6_2_mbox_priorbox"
  prior_box_param {
    min_size: 90.0
    max_size: 150.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    aspect_ratio: 4.0
    aspect_ratio: 5.0
    flip: true
    clip: false
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
    step: 32.0
    offset: 0.5
    denser_prior_boxes: true
layer {
  name: "conv7_2_mbox_loc"
  type: "Convolution"
  bottom: "conv7_2"
  top: "conv7_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 240
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv7_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv7_2_mbox_loc"
  top: "conv7_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv7_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv7_2_mbox_loc_perm"
  top: "conv7_2_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv7_2_mbox_conf"
  type: "Convolution"
  bottom: "conv7_2"
  top: "conv7_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 40
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv7_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv7_2_mbox_conf"
  top: "conv7_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv7_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv7_2_mbox_conf_perm"
  top: "conv7_2_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv7_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv7_2"
  bottom: "data"
  top: "conv7_2_mbox_priorbox"
  prior_box_param {
    min_size: 150.0
    max_size: 210.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    aspect_ratio: 4.0
    aspect_ratio: 5.0
    flip: true
    clip: false
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
    step: 64.0
    offset: 0.5
    denser_prior_boxes: true
layer {
  name: "conv8_2_mbox_loc"
  type: "Convolution"
  bottom: "conv8_2"
  top: "conv8_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 240
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv8_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv8_2_mbox_loc"
  top: "conv8_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv8_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv8_2_mbox_loc_perm"
  top: "conv8_2_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv8_2_mbox_conf"
  type: "Convolution"
  bottom: "conv8_2"
  top: "conv8_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 40
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv8_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv8_2_mbox_conf"
  top: "conv8_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv8_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv8_2_mbox_conf_perm"
  top: "conv8_2_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv8_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv8_2"
  bottom: "data"
  top: "conv8_2_mbox_priorbox"
  prior_box_param {
    min_size: 210.0
    max_size: 270.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    aspect_ratio: 4.0
    aspect_ratio: 5.0
    flip: true
    clip: false
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
    step: 100.0
    offset: 0.5
    denser_prior_boxes: true
layer {
  name: "conv9_2_mbox_loc"
  type: "Convolution"
  bottom: "conv9_2"
  top: "conv9_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 240
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv9_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv9_2_mbox_loc"
  top: "conv9_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv9_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv9_2_mbox_loc_perm"
  top: "conv9_2_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv9_2_mbox_conf"
  type: "Convolution"
  bottom: "conv9_2"
  top: "conv9_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  convolution_param {
    num_output: 40
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0.0
    pad_h: 1
    pad_w: 2
    kernel_h: 3
    kernel_w: 5
    stride_h: 1
    stride_w: 1
layer {
  name: "conv9_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv9_2_mbox_conf"
  top: "conv9_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "conv9_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv9_2_mbox_conf_perm"
  top: "conv9_2_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "conv9_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv9_2"
  bottom: "data"
  top: "conv9_2_mbox_priorbox"
  prior_box_param {
    min_size: 270.0
    max_size: 330.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    aspect_ratio: 4.0
    aspect_ratio: 5.0
    flip: true
    clip: false
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
    step: 300.0
    offset: 0.5
    denser_prior_boxes: true
layer {
  name: "mbox_loc"
  type: "Concat"
  bottom: "conv4_3_norm_mbox_loc_flat"
  bottom: "fc7_mbox_loc_flat"
  bottom: "conv6_2_mbox_loc_flat"
  bottom: "conv7_2_mbox_loc_flat"
  bottom: "conv8_2_mbox_loc_flat"
  bottom: "conv9_2_mbox_loc_flat"
  top: "mbox_loc"
  concat_param {
    axis: 1
layer {
  name: "mbox_conf"
  type: "Concat"
  bottom: "conv4_3_norm_mbox_conf_flat"
  bottom: "fc7_mbox_conf_flat"
  bottom: "conv6_2_mbox_conf_flat"
  bottom: "conv7_2_mbox_conf_flat"
  bottom: "conv8_2_mbox_conf_flat"
  bottom: "conv9_2_mbox_conf_flat"
  top: "mbox_conf"
  concat_param {
    axis: 1
layer {
  name: "mbox_priorbox"
  type: "Concat"
  bottom: "conv4_3_norm_mbox_priorbox"
  bottom: "fc7_mbox_priorbox"
  bottom: "conv6_2_mbox_priorbox"
  bottom: "conv7_2_mbox_priorbox"
  bottom: "conv8_2_mbox_priorbox"
  bottom: "conv9_2_mbox_priorbox"
  top: "mbox_priorbox"
  concat_param {
    axis: 2
layer {
  name: "mbox_loss"
  type: "MultiBoxLoss"
  bottom: "mbox_loc"
  bottom: "mbox_conf"
  bottom: "mbox_priorbox"
  bottom: "label"
  top: "mbox_loss"
  include {
    phase: TRAIN
  propagate_down: true
  propagate_down: true
  propagate_down: false
  propagate_down: false
  loss_param {
    normalization: VALID
  multibox_loss_param {
    loc_loss_type: SMOOTH_L1
    conf_loss_type: SOFTMAX
    loc_weight: 0.20000000298
    num_classes: 2
    share_location: true
    match_type: PER_PREDICTION
    overlap_threshold: 0.5
    use_prior_for_matching: true
    background_label_id: 0
    use_difficult_gt: false
    neg_pos_ratio: 3.0
    neg_overlap: 0.5
    code_type: CENTER_SIZE
    ignore_cross_boundary_bbox: false
    mining_type: MAX_NEGATIVE
    use_polygon: true
shankarj67 commented 6 years ago

Hi @MhLiao

Thank you for the train.prototext.

I have one doubt regarding deploy.txt and train.txt, could you please help me whether both the file are same or different.

MhLiao commented 6 years ago

The train.prototxt contains loss layers while the deploy.prototxt doesn't, which is only used for testing.

shankarj67 commented 6 years ago

Hi @MhLiao

You have trained your model on ICDAR 2015 Incidental Text and weight is model_icdar15.caffemodel.

I just wanted to confirm that if I convert this weight to keras model, would I be able to do the detection and recognition without any training involved.

DecentMakeover commented 5 years ago

@shankarj67 did you convert this model to keras?