Open deep0learning opened 6 years ago
@deep0learning good observation. For SHVN ---> MNIST using LeNet without adaptation the accuracy is 60%.
And using CORAL it should be 79%.
The Alexnet (one weird trick) used in this repository is different from the Alexnet (original) used by CORAL. Also the better accuracy in the paper is due to an additional mean loss function they used in the Caffe prototxt:
layer { name: "mean_loss" type: "EuclideanLoss" bottom: "mean_source" bottom: "mean_target" top: "mean_loss" loss_weight: 0 include { phase: TRAIN } }
@debasmitdas
How you find this information?
In D-CORAL they did not use any mean loss/ EuclideanLoss .
Check the .prototxt file in https://github.com/VisionLearningGroup/CORAL/tree/master/code after unzipping.
Did not find any loss that you have mentioned. See .prototxt file.
layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/amazon.txt" batch_size: 128 shuffle: true new_width: 256 new_height: 256 } }
layer { name: "data_t" type: "ImageData" top: "data_t" top: "label_t" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/webcam.txt" batch_size: 128 shuffle: true new_width: 256 new_height: 256 } }
layer { name: "silence_target_label" type: "Silence" bottom: "label_t" include { phase: TRAIN } }
layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TEST } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/webcam.txt" batch_size: 53 # 53*15 = 795 size of the target dataset shuffle: true new_width: 256 new_height: 256 } }
layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { name: "sharedweights_conv1" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv1" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 96 pad: 0 kernel_size: 11 group: 1 stride: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv1_t" type: "Convolution" bottom: "data_t" top: "conv1_t" include { phase: TRAIN } param { name: "sharedweights_conv1" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv1" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 96 pad: 0 kernel_size: 11 group: 1 stride: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1"
} layer { name: "relu1_t" type: "ReLU" bottom: "conv1_t" top: "conv1_t" include { phase: TRAIN }
}
layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1"
pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool1_t" type: "Pooling" bottom: "conv1_t" top: "pool1_t" include { phase: TRAIN }
pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1"
lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "norm1_t" type: "LRN" bottom: "pool1_t" top: "norm1_t" include { phase: TRAIN }
lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } }
layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { name: "sharedweights_conv2" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv2" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv2_t" type: "Convolution" bottom: "norm1_t" top: "conv2_t" include { phase: TRAIN } param { name: "sharedweights_conv2" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv2" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2"
} layer { name: "relu2_t" type: "ReLU" bottom: "conv2_t" top: "conv2_t" include { phase: TRAIN }
}
layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2"
pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool2_t" type: "Pooling" bottom: "conv2_t" top: "pool2_t" include { phase: TRAIN }
pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2"
lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "norm2_t" type: "LRN" bottom: "pool2_t" top: "norm2_t" include { phase: TRAIN }
lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } }
layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { name: "sharedweights_conv3" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv3" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 1 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv3_t" type: "Convolution" bottom: "norm2_t" top: "conv3_t" include { phase: TRAIN } param { name: "sharedweights_conv3" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv3" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 1 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3"
} layer { name: "relu3_t" type: "ReLU" bottom: "conv3_t" top: "conv3_t" include { phase: TRAIN }
}
layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { name: "sharedweights_conv4" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv4" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv4_t" type: "Convolution" bottom: "conv3_t" top: "conv4_t" include { phase: TRAIN } param { name: "sharedweights_conv4" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv4" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4"
} layer { name: "relu4_t" type: "ReLU" bottom: "conv4_t" top: "conv4_t" include { phase: TRAIN }
}
layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { name: "sharedweights_conv5" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv5" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv5_t" type: "Convolution" bottom: "conv4_t" top: "conv5_t" include { phase: TRAIN } param { name: "sharedweights_conv5" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv5" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5"
} layer { name: "relu5_t" type: "ReLU" bottom: "conv5_t" top: "conv5_t" include { phase: TRAIN }
}
layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5"
pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool5_t" type: "Pooling" bottom: "conv5_t" top: "pool5_t" include { phase: TRAIN }
pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { name: "sharedweights_fc6" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc6" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc6_t" type: "InnerProduct" bottom: "pool5_t" top: "fc6_t" include { phase: TRAIN } param { name: "sharedweights_fc6" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc6" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6"
} layer { name: "relu6_t" type: "ReLU" bottom: "fc6_t" top: "fc6_t" include { phase: TRAIN }
}
layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6"
dropout_param { dropout_ratio: 0.5 } } layer { name: "drop6_t" type: "Dropout" bottom: "fc6_t" top: "fc6_t" include { phase: TRAIN }
dropout_param { dropout_ratio: 0.5 } }
layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param { name: "sharedweights_fc7" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc7" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc7_t" type: "InnerProduct" bottom: "fc6_t" top: "fc7_t" include { phase: TRAIN } param { name: "sharedweights_fc7" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc7" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7"
} layer { name: "relu7_t" type: "ReLU" bottom: "fc7_t" top: "fc7_t" include { phase: TRAIN }
}
layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7"
dropout_param { dropout_ratio: 0.5 } } layer { name: "drop7_t" type: "Dropout" bottom: "fc7_t" top: "fc7_t" include { phase: TRAIN }
dropout_param { dropout_ratio: 0.5 } }
layer { name: "fc8_office" type: "InnerProduct" bottom: "fc7" top: "fc8_office" param { name: "sharedweights_fc8" lr_mult: 10.0 decay_mult: 1.0 } param { name: "sharedbias_fc8" lr_mult: 20.0 decay_mult: 0.0 } inner_product_param { num_output: 31 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc8_office_t" type: "InnerProduct" bottom: "fc7_t" top: "fc8_office_t" include { phase: TRAIN } param { name: "sharedweights_fc8" lr_mult: 10.0 decay_mult: 1.0 } param { name: "sharedbias_fc8" lr_mult: 20.0 decay_mult: 0.0 } inner_product_param { num_output: 31 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }
layer { type: 'CORALLoss' name: 'coral_fc8' top: 'coral_fc8' bottom: 'fc8_office' bottom: 'fc8_office_t' include { phase: TRAIN } loss_weight: 0.8 }
layer { name: "silence_target_fc8" type: "Silence" bottom: "fc8_office_t" include { phase: TRAIN } }
layer { name: "classification_loss" top: "classification_loss" type: "SoftmaxWithLoss" bottom: "fc8_office" bottom: "label" loss_weight: 1 include { phase: TRAIN } } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_office" bottom: "label" top: "accuracy" }
I got this accuracy for SHVN -----> MNIST
Test Source: Epoch: 2460, avg_loss: 0.0003, Accuracy: 73255/73257 (100.00%)
Test Target: Epoch: 2460, avg_loss: 21.5493, Accuracy: 35242/60000 (58.74%)
That means data loading does not matter to get good accuracy. Because for SHVN---->MNIST, I just use default data processing.