Doubts about the Latent layer and num_output

Amschel commented 7 years ago

Hi @kevinlin311tw ,

Could you please explain the last layers of your train_val.prototxt:

layer { name: "latent" type: "InnerProduct" bottom: "fc7" top: "latent" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 48 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { bottom: "latent" top: "latent_sigmoid" name: "latent_sigmoid" type: "Sigmoid" } layer { name: "fc9" type: "InnerProduct" bottom: "latent_sigmoid" top: "fc9" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 4 weight_filler { type: "gaussian" std: 0.2 } bias_filler { type: "constant" value: 0 } } } layer { name: "accuracy" type: "MultiLabelAccuracy" bottom: "fc9" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "loss" type: "MultiLabelSigmoidLoss" bottom: "fc9" bottom: "label" top: "loss: multi-class-classfication-error" loss_weight: 1 }

Why does the latent layer have the num_output = 48?

Also, I have 10 labels/image, but at testing I get: I0314 12:02:45.114591 16476 solver.cpp:398] Test net output #0: accuracy = 0.106192 I0314 12:02:45.114686 16476 solver.cpp:398] Test net output #1: accuracy = 0.497902 I0314 12:02:45.114693 16476 solver.cpp:398] Test net output #2: accuracy = -nan I0314 12:02:45.114698 16476 solver.cpp:398] Test net output #3: accuracy = 6.10033e-05 I0314 12:02:45.114704 16476 solver.cpp:398] Test net output #4: accuracy = 0.000121898

Should't I have 10 outputs?

This is how the layer looks in my settings:

layer { name: "fc9" type: "InnerProduct" bottom: "latent_sigmoid" top: "fc9" param { lr_mult: 10 decay_mult: 1 } param { lr_mult: 20 decay_mult: 0 } inner_product_param { num_output: 10 weight_filler { type: "gaussian" std: 0.2 } bias_filler { type: "constant" value: 0 } } }

Thank you!

kevinlin311tw commented 7 years ago

Hmm... You can remove the 48-nodes latent layer. Adding that layer is just for my homeworks.

Amschel commented 7 years ago

Hi @kevinlin311tw,

Thank you!

I'm looking on the code, but I still don't understand the output accuracy on validation. Let me give more details, so you can understand what I'm trying to do:

my train file has the following structure:

path_image1 1 -1 -1 -1 1 1 -1 -1 -1 -1 ... ... ... path_imageN 1 1 1 -1 1 1 -1 -1 -1 -1

So I have 10 labels for every image.

Here is my network architecture:

name: "multi-class-alexnet@Credits: https://github.com/kevinlin311tw/caffe-multilabel"
layer {
  name: "data"
  type: "ImageData"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    mirror: true
    crop_size: 227
  }
  image_data_param {
    source: "train.txt"
    batch_size: 256
    new_height: 256
    new_width: 256
    label_size: 10
  }
}

layer {
  name: "data"
  type: "ImageData"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    mirror: false
    crop_size: 227
  }
  image_data_param {
    source: "val.txt"
    batch_size: 64
    new_height: 256
    new_width: 256
    label_size: 10
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "norm1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "pool2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "norm2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  bottom: "fc7"
  top: "latent_sigmoid"
  name: "latent_sigmoid"
  type: "Sigmoid"
}
layer {
  name: "fc8-1"
  type: "InnerProduct"
  bottom: "latent_sigmoid"
  top: "fc8-1"
  param {
    lr_mult: 2
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "gaussian"
      std: 0.2
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "MultiLabelAccuracy"
  bottom: "fc8-1"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "MultiLabelSigmoidLoss"
  bottom: "fc8-1"
  bottom: "label"
  top: "loss: multi-class-classfication-error"
  loss_weight: 1
}

Everything works ok on training, but when I do the validation, the net output is the following:

I0314 12:02:45.114591 16476 solver.cpp:398] Test net output #0: accuracy = 0.106192
I0314 12:02:45.114686 16476 solver.cpp:398] Test net output #1: accuracy = 0.497902
I0314 12:02:45.114693 16476 solver.cpp:398] Test net output #2: accuracy = -nan
I0314 12:02:45.114698 16476 solver.cpp:398] Test net output #3: accuracy = 6.10033e-05
I0314 12:02:45.114704 16476 solver.cpp:398] Test net output #4: accuracy = 0.000121898

So, I have 2 questions. First, is the network prototxt that i'm using OK? Second, why do I get 5 outputs?

Thank you!

kevinlin311tw commented 7 years ago

I'm not quite understand why you want to add relu+dropout+sigmoid in your network. removing that sigmoid layer would be better.
The five outputs have different meanings, such as precision, recall, F1 scores and others. Check ./src/caffe/layers/multi_label_accuracy_layer.cpp for more details.

Amschel commented 7 years ago

Hi @kevinlin311tw ,

Thank you for your assist. One more thing. At testing, what layer should I use to get the probability for each of the class?

kevinlin311tw / caffe-multilabel

Doubts about the Latent layer and num_output #1