SSARCandy / DeepCORAL

🧠 A PyTorch implementation of 'Deep CORAL: Correlation Alignment for Deep Domain Adaptation.', ECCV 2016
https://ssarcandy.tw/2017/10/31/deep-coral/
226 stars 42 forks source link

Low Accuracy for SHVN ---> MNIST #9

Open deep0learning opened 6 years ago

deep0learning commented 6 years ago

I got this accuracy for SHVN -----> MNIST

Test Source: Epoch: 2460, avg_loss: 0.0003, Accuracy: 73255/73257 (100.00%)

Test Target: Epoch: 2460, avg_loss: 21.5493, Accuracy: 35242/60000 (58.74%)

That means data loading does not matter to get good accuracy. Because for SHVN---->MNIST, I just use default data processing.

redhat12345 commented 6 years ago

@deep0learning good observation. For SHVN ---> MNIST using LeNet without adaptation the accuracy is 60%.

And using CORAL it should be 79%.

debasmitdas commented 6 years ago

The Alexnet (one weird trick) used in this repository is different from the Alexnet (original) used by CORAL. Also the better accuracy in the paper is due to an additional mean loss function they used in the Caffe prototxt:

layer { name: "mean_loss" type: "EuclideanLoss" bottom: "mean_source" bottom: "mean_target" top: "mean_loss" loss_weight: 0 include { phase: TRAIN } }

redhat12345 commented 6 years ago

@debasmitdas

How you find this information?

In D-CORAL they did not use any mean loss/ EuclideanLoss .

debasmitdas commented 6 years ago

Check the .prototxt file in https://github.com/VisionLearningGroup/CORAL/tree/master/code after unzipping.

deep0learning commented 6 years ago

Did not find any loss that you have mentioned. See .prototxt file.

labelled source data

layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/amazon.txt" batch_size: 128 shuffle: true new_width: 256 new_height: 256 } }

unlabelled target data

layer { name: "data_t" type: "ImageData" top: "data_t" top: "label_t" include { phase: TRAIN } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/webcam.txt" batch_size: 128 shuffle: true new_width: 256 new_height: 256 } }

add silence to suppress output of labels

layer { name: "silence_target_label" type: "Silence" bottom: "label_t" include { phase: TRAIN } }

target data for testing

layer { name: "data" type: "ImageData" top: "data" top: "label" include { phase: TEST } transform_param { mirror: true crop_size: 227 mean_value: 104.0 mean_value: 116.7 mean_value: 122.7 } image_data_param { source: "examples/office31/webcam.txt" batch_size: 53 # 53*15 = 795 size of the target dataset shuffle: true new_width: 256 new_height: 256 } }

conv1

layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { name: "sharedweights_conv1" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv1" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 96 pad: 0 kernel_size: 11 group: 1 stride: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv1_t" type: "Convolution" bottom: "data_t" top: "conv1_t" include { phase: TRAIN } param { name: "sharedweights_conv1" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv1" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 96 pad: 0 kernel_size: 11 group: 1 stride: 4 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu1

layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1"

param: "sharedweights_relu1"

} layer { name: "relu1_t" type: "ReLU" bottom: "conv1_t" top: "conv1_t" include { phase: TRAIN }

param: "sharedweights_relu1"

}

pool1

layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1"

param: "sharedweights_pool1"

pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool1_t" type: "Pooling" bottom: "conv1_t" top: "pool1_t" include { phase: TRAIN }

param: "sharedweights_pool1"

pooling_param { pool: MAX kernel_size: 3 stride: 2 } }

norm1

layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1"

param: "sharedweights_norm1"

lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "norm1_t" type: "LRN" bottom: "pool1_t" top: "norm1_t" include { phase: TRAIN }

param: "sharedweights_norm1"

lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } }

conv2

layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { name: "sharedweights_conv2" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv2" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv2_t" type: "Convolution" bottom: "norm1_t" top: "conv2_t" include { phase: TRAIN } param { name: "sharedweights_conv2" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv2" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu2

layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2"

param: "sharedweights_relu2"

} layer { name: "relu2_t" type: "ReLU" bottom: "conv2_t" top: "conv2_t" include { phase: TRAIN }

param: "sharedweights_relu2"

}

pool2

layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2"

param: "sharedweights_pool2"

pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool2_t" type: "Pooling" bottom: "conv2_t" top: "pool2_t" include { phase: TRAIN }

param: "sharedweights_pool2"

pooling_param { pool: MAX kernel_size: 3 stride: 2 } }

norm2

layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2"

param: "sharedweights_norm2"

lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "norm2_t" type: "LRN" bottom: "pool2_t" top: "norm2_t" include { phase: TRAIN }

param: "sharedweights_norm2"

lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } }

conv3

layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { name: "sharedweights_conv3" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv3" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 1 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv3_t" type: "Convolution" bottom: "norm2_t" top: "conv3_t" include { phase: TRAIN } param { name: "sharedweights_conv3" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv3" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 1 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu3

layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3"

param: "sharedweights_relu3"

} layer { name: "relu3_t" type: "ReLU" bottom: "conv3_t" top: "conv3_t" include { phase: TRAIN }

param: "sharedweights_relu3"

}

conv4

layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { name: "sharedweights_conv4" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv4" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv4_t" type: "Convolution" bottom: "conv3_t" top: "conv4_t" include { phase: TRAIN } param { name: "sharedweights_conv4" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv4" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu4

layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4"

param: "sharedweights_relu4"

} layer { name: "relu4_t" type: "ReLU" bottom: "conv4_t" top: "conv4_t" include { phase: TRAIN }

param: "sharedweights_relu4"

}

conv5

layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { name: "sharedweights_conv5" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv5" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "conv5_t" type: "Convolution" bottom: "conv4_t" top: "conv5_t" include { phase: TRAIN } param { name: "sharedweights_conv5" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_conv5" lr_mult: 2.0 decay_mult: 0.0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 stride: 1 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu5

layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5"

param: "sharedweights_relu5"

} layer { name: "relu5_t" type: "ReLU" bottom: "conv5_t" top: "conv5_t" include { phase: TRAIN }

param: "sharedweights_relu5"

}

pool5

layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5"

param: "sharedweights_pool5"

pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "pool5_t" type: "Pooling" bottom: "conv5_t" top: "pool5_t" include { phase: TRAIN }

param: "sharedweights_pool5"

pooling_param { pool: MAX kernel_size: 3 stride: 2 } }

fc6

layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { name: "sharedweights_fc6" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc6" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc6_t" type: "InnerProduct" bottom: "pool5_t" top: "fc6_t" include { phase: TRAIN } param { name: "sharedweights_fc6" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc6" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu6

layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6"

param: "sharedweights_relu6"

} layer { name: "relu6_t" type: "ReLU" bottom: "fc6_t" top: "fc6_t" include { phase: TRAIN }

param: "sharedweights_relu6"

}

drop6

layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6"

param: "sharedweights_drop6"

dropout_param { dropout_ratio: 0.5 } } layer { name: "drop6_t" type: "Dropout" bottom: "fc6_t" top: "fc6_t" include { phase: TRAIN }

param: "sharedweights_drop6"

dropout_param { dropout_ratio: 0.5 } }

fc7

layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param { name: "sharedweights_fc7" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc7" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc7_t" type: "InnerProduct" bottom: "fc6_t" top: "fc7_t" include { phase: TRAIN } param { name: "sharedweights_fc7" lr_mult: 1.0 decay_mult: 1.0 } param { name: "sharedbias_fc7" lr_mult: 2.0 decay_mult: 0.0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

relu7

layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7"

param: "sharedweights_relu7"

} layer { name: "relu7_t" type: "ReLU" bottom: "fc7_t" top: "fc7_t" include { phase: TRAIN }

param: "sharedweights_relu7"

}

drop7

layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7"

param: "sharedweights_drop7"

dropout_param { dropout_ratio: 0.5 } } layer { name: "drop7_t" type: "Dropout" bottom: "fc7_t" top: "fc7_t" include { phase: TRAIN }

param: "sharedweights_drop7"

dropout_param { dropout_ratio: 0.5 } }

fc8

layer { name: "fc8_office" type: "InnerProduct" bottom: "fc7" top: "fc8_office" param { name: "sharedweights_fc8" lr_mult: 10.0 decay_mult: 1.0 } param { name: "sharedbias_fc8" lr_mult: 20.0 decay_mult: 0.0 } inner_product_param { num_output: 31 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } } layer { name: "fc8_office_t" type: "InnerProduct" bottom: "fc7_t" top: "fc8_office_t" include { phase: TRAIN } param { name: "sharedweights_fc8" lr_mult: 10.0 decay_mult: 1.0 } param { name: "sharedbias_fc8" lr_mult: 20.0 decay_mult: 0.0 } inner_product_param { num_output: 31 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 0.0 } } }

layer { type: 'CORALLoss' name: 'coral_fc8' top: 'coral_fc8' bottom: 'fc8_office' bottom: 'fc8_office_t' include { phase: TRAIN } loss_weight: 0.8 }

add silence to suppress output of fc8_office_t

layer { name: "silence_target_fc8" type: "Silence" bottom: "fc8_office_t" include { phase: TRAIN } }

Classification Loss

layer { name: "classification_loss" top: "classification_loss" type: "SoftmaxWithLoss" bottom: "fc8_office" bottom: "label" loss_weight: 1 include { phase: TRAIN } } layer { name: "accuracy" type: "Accuracy" bottom: "fc8_office" bottom: "label" top: "accuracy" }