xialuxi / arcface-caffe

insightface-caffe
MIT License
280 stars 124 forks source link

训练时loss损失一直不变,希望指点一下 #32

Open jinpeifei2015 opened 5 years ago

jinpeifei2015 commented 5 years ago

模型deploy如下: name: "ArcFace" layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { resize_param { prob: 1 resize_mode: WARP height: 128 width: 128 interp_mode: LINEAR interp_mode: AREA interp_mode: CUBIC interp_mode: LANCZOS4 } mirror: True crop_h: 128 crop_w: 128

distort_param {

#  brightness_prob: 0.5
#  brightness_delta: 32
#  contrast_prob: 0.5
#  contrast_lower: 0.5
#  contrast_upper: 1.5
#  hue_prob: 0.5
#  hue_delta: 18
#  saturation_prob: 0.5
#  saturation_lower: 0.5
#  saturation_upper: 1.5
#  random_order_prob: 0.
#}

} data_param { source: "/media/zz/7c333a37-0503-4f81-8103-0ef7e776f6fb/Face_Data/casia_extract_aligned_train_9204cls_lmdb" batch_size: 512 backend: LMDB } } layer { name: "data" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { resize_param { prob: 1 resize_mode: WARP height: 128 width: 128 interp_mode: LINEAR } crop_h: 128 crop_w: 128 } data_param { source: "/media/zz/7c333a37-0503-4f81-8103-0ef7e776f6fb/Face_Data/casia_extract_aligned_test_9204cls_lmdb" batch_size: 2 backend: LMDB } } ############## CNN Architecture ############### layer { name: "data/bias" type: "Bias" bottom: "data" top: "data/bias" param { lr_mult: 0 decay_mult: 0 } bias_param { filler { type: "constant" value: -128 } } } ################################################ layer { name: "conv1" type: "Convolution" bottom: "data/bias" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 32 kernel_size: 7 pad: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv1_bn" type: "BatchNorm" bottom: "conv1" top: "conv1" } layer { name: "conv1_scale" type: "Scale" bottom: "conv1" top: "conv1" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv1_relu" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "pool1_1" type: "Pooling" bottom: "pool1" top: "pool1_1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2_1" type: "Convolution" bottom: "pool1_1" top: "conv2_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 32 kernel_size: 1 stride: 1 pad: 0 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv2_1_bn" type: "BatchNorm" bottom: "conv2_1" top: "conv2_1" } layer { name: "conv2_1_scale" type: "Scale" bottom: "conv2_1" top: "conv2_1" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv2_1_relu" type: "ReLU" bottom: "conv2_1" top: "conv2_1" } layer { name: "conv2_2" type: "Convolution" bottom: "conv2_1" top: "conv2_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 pad: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv2_2_bn" type: "BatchNorm" bottom: "conv2_2" top: "conv2_2" } layer { name: "conv2_2_scale" type: "Scale" bottom: "conv2_2" top: "conv2_2" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv2_2_relu" type: "ReLU" bottom: "conv2_2" top: "conv2_2" } layer { name: "pool2" type: "Pooling" bottom: "conv2_2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } ############################################## layer { name: "conv3_1" type: "Convolution" bottom: "pool2" top: "conv3_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv3_1_bn" type: "BatchNorm" bottom: "conv3_1" top: "conv3_1" } layer { name: "conv3_1_scale" type: "Scale" bottom: "conv3_1" top: "conv3_1" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv3_1_relu" type: "ReLU" bottom: "conv3_1" top: "conv3_1" } layer { name: "conv3_2" type: "Convolution" bottom: "conv3_1" top: "conv3_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv3_2_bn" type: "BatchNorm" bottom: "conv3_2" top: "conv3_2" } layer { name: "conv3_2_scale" type: "Scale" bottom: "conv3_2" top: "conv3_2" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv3_2_relu" type: "ReLU" bottom: "conv3_2" top: "conv3_2" } layer { name: "conv4_1" type: "Convolution" bottom: "conv3_2" top: "conv4_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv4_1_bn" type: "BatchNorm" bottom: "conv4_1" top: "conv4_1" } layer { name: "conv4_1_scale" type: "Scale" bottom: "conv4_1" top: "conv4_1" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv4_1_relu" type: "ReLU" bottom: "conv4_1" top: "conv4_1" } layer { name: "conv4_2" type: "Convolution" bottom: "conv4_1" top: "conv4_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv4_2_bn" type: "BatchNorm" bottom: "conv4_2" top: "conv4_2" } layer { name: "conv4_2_scale" type: "Scale" bottom: "conv4_2" top: "conv4_2" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv4_2_relu" type: "ReLU" bottom: "conv4_2" top: "conv4_2" } ################################################ layer { name: "conv5_1" type: "Convolution" bottom: "conv4_2" top: "conv5_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 1 pad: 0 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "conv5_1_bn" type: "BatchNorm" bottom: "conv5_1" top: "conv5_1" } layer { name: "conv5_1_scale" type: "Scale" bottom: "conv5_1" top: "conv5_1" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "conv5_1_relu" type: "ReLU" bottom: "conv5_1" top: "conv5_1" } layer { name: "pool3" type: "Pooling" bottom: "conv5_1" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } ######################################### ######################################### layer { name: "fc1" type: "InnerProduct" bottom: "pool3" top: "fc1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "fc1_bn" type: "BatchNorm" bottom: "fc1" top: "fc1" } layer { name: "fc1_scale" type: "Scale" bottom: "fc1" top: "fc1" scale_param { filler { value: 1 } bias_term: true bias_filler { value: 0 } } } layer { name: "fc1_relu" type: "ReLU" bottom: "fc1" top: "fc1" } layer { name: "fc2" type: "InnerProduct" bottom: "fc1" top: "fc2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 128 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "fc2_norm" type: "NormalizeJin" bottom: "fc2" top: "fc2_norm" norm_jin_param { across_spatial: true scale_filler { type: "constant" value: 1.0 } channel_shared: true } } ############### Arc-Softmax Loss ##############

layer { name: "fc6_changed" type: "InnerProduct" bottom: "fc2_norm" top: "fc6" inner_product_param { num_output: 9204 normalize: true weight_filler { type: "xavier" } bias_term: false } } #################################################### layer { name: "cosin_add_m" type: "CosinAddm" bottom: "fc6" bottom: "label" top: "fc6_margin" cosin_add_m_param { m: 0.1 } include { phase: TRAIN } }

layer { name: "fc6_margin_scale" type: "Scale" bottom: "fc6_margin" top: "fc6_margin_scale" param { lr_mult: 0 decay_mult: 0 } scale_param { filler{ type: "constant" value: 64 } } include { phase: TRAIN } }

###################################################### layer { name: "softmax_loss" type: "SoftmaxWithLoss" bottom: "fc6_margin_scale" bottom: "label"

bottom: "label"

bottom: "data"

top: "softmax_loss" loss_weight: 1 include { phase: TRAIN } }

layer { name: "Accuracy" type: "Accuracy" bottom: "fc6" bottom: "label" top: "accuracy" include { phase: TEST } }

loss损失如下: I0627 17:38:58.567371 6757 solver.cpp:224] Iteration 450 (2.13816 iter/s, 4.67691s/10 iters), loss = 87.3365 I0627 17:38:58.567402 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 ( 1 = 87.3365 loss) I0627 17:38:58.567409 6757 sgd_solver.cpp:137] Iteration 450, lr = 0.00314 I0627 17:39:03.256306 6757 solver.cpp:224] Iteration 460 (2.13288 iter/s, 4.6885s/10 iters), loss = 87.3365 I0627 17:39:03.256340 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 ( 1 = 87.3365 loss) I0627 17:39:03.256347 6757 sgd_solver.cpp:137] Iteration 460, lr = 0.00314 I0627 17:39:07.941520 6757 solver.cpp:224] Iteration 470 (2.13457 iter/s, 4.68478s/10 iters), loss = 87.3365 I0627 17:39:07.941551 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 ( 1 = 87.3365 loss) I0627 17:39:07.941558 6757 sgd_solver.cpp:137] Iteration 470, lr = 0.00314 I0627 17:39:12.623337 6757 solver.cpp:224] Iteration 480 (2.13612 iter/s, 4.68139s/10 iters), loss = 87.3365 I0627 17:39:12.623456 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 ( 1 = 87.3365 loss) 请问该如何修改?

raywang1992 commented 5 years ago

请问你解决这个问题了吗

changgongcheng commented 4 years ago

同样的问题啊

vaan2010 commented 3 years ago

在solver.prototxt中的learning rate調低試試看,我自己是用0.0005或是0.0001可以正常收斂