fundamentalvision / Deformable-DETR

Deformable DETR: Deformable Transformers for End-to-End Object Detection.
Apache License 2.0
3.15k stars 513 forks source link

loss kept 24 when I retrain with coco2017 dataset #138

Open gmstyy opened 2 years ago

gmstyy commented 2 years ago

I trying to retrain the model,but get loss non convergence,does not match the train log given by anthor, my env as below: torch 1.10.1 cuda 11.1 (RTX3090) torchvision 0.11.0 backbone: resnet50 batch_size:2

log: {"train_lr": 0.00019999999999975943, "train_class_error": 75.72402580607138, "train_grad_norm": 174.16096408082132, "train_loss": 24.92455884112105, "train_loss_ce": 1.5516803443323013, "train_loss_bbox": 1.2958867718301648, "train_loss_giou": 1.3179215813784224, "train_loss_ce_0": 1.555705033393809, "train_loss_bbox_0": 1.2454274639411556, "train_loss_giou_0": 1.3100825519216728, "train_loss_ce_1": 1.5533387946742339, "train_loss_bbox_1": 1.2817698136110973, "train_loss_giou_1": 1.314233873036744, "train_loss_ce_2": 1.553454900913335, "train_loss_bbox_2": 1.2827665426917714, "train_loss_giou_2": 1.3160993929237823, "train_loss_ce_3": 1.5536564171927232, "train_loss_bbox_3": 1.2971249588876763, "train_loss_giou_3": 1.3202824098258321, "train_loss_ce_4": 1.5495921454574335, "train_loss_bbox_4": 1.3055195952253518, "train_loss_giou_4": 1.32001628112563, "train_loss_ce_unscaled": 0.7758401721661506, "train_class_error_unscaled": 75.72402580607138, "train_loss_bbox_unscaled": 0.2591773542356366, "train_loss_giou_unscaled": 0.6589607906892112, "train_cardinality_error_unscaled": 93.417772029048, "train_loss_ce_0_unscaled": 0.7778525166969045, "train_loss_bbox_0_unscaled": 0.24908549271966943, "train_loss_giou_0_unscaled": 0.6550412759608364, "train_cardinality_error_0_unscaled": 93.41770439693289, "train_loss_ce_1_unscaled": 0.7766693973371169, "train_loss_bbox_1_unscaled": 0.25635396260547877, "train_loss_giou_1_unscaled": 0.657116936518372, "train_cardinality_error_1_unscaled": 93.41465249773856, "train_loss_ce_2_unscaled": 0.7767274504566675, "train_loss_bbox_2_unscaled": 0.2565533084156676, "train_loss_giou_2_unscaled": 0.6580496964618912, "train_cardinality_error_2_unscaled": 93.41772130496166, "train_loss_ce_3_unscaled": 0.7768282085963616, "train_loss_bbox_3_unscaled": 0.25942499168076155, "train_loss_giou_3_unscaled": 0.6601412049129161, "train_cardinality_error_3_unscaled": 93.41781429911994, "train_loss_ce_4_unscaled": 0.7747960727287168, "train_loss_bbox_4_unscaled": 0.2611039189051818, "train_loss_giou_4_unscaled": 0.660008140562815, "train_cardinality_error_4_unscaled": 93.41767903488972, "test_class_error": 75.51222823181152, "test_loss": 24.403997676754, "test_loss_ce": 1.5397335748314858, "test_loss_bbox": 1.1809757440328599, "test_loss_giou": 1.328350156122446, "test_loss_ce_0": 1.5380387696385383, "test_loss_bbox_0": 1.172936351659894, "test_loss_giou_0": 1.343787284964323, "test_loss_ce_1": 1.5389538348913192, "test_loss_bbox_1": 1.1948208822458983, "test_loss_giou_1": 1.3359532308876514, "test_loss_ce_2": 1.5372176666259765, "test_loss_bbox_2": 1.2015455790698528, "test_loss_giou_2": 1.3387366657078266, "test_loss_ce_3": 1.5392194417476655, "test_loss_bbox_3": 1.199291840606928, "test_loss_giou_3": 1.3283177320718764, "test_loss_ce_4": 1.533316547882557, "test_loss_bbox_4": 1.2135228402704001, "test_loss_giou_4": 1.3392795690715313, "test_loss_ce_unscaled": 0.7698667874157429, "test_class_error_unscaled": 75.51222823181152, "test_loss_bbox_unscaled": 0.2361951487019658, "test_loss_giou_unscaled": 0.664175078061223, "test_cardinality_error_unscaled": 92.733, "test_loss_ce_0_unscaled": 0.7690193848192691, "test_loss_bbox_0_unscaled": 0.23458727022111417, "test_loss_giou_0_unscaled": 0.6718936424821615, "test_cardinality_error_0_unscaled": 92.733, "test_loss_ce_1_unscaled": 0.7694769174456596, "test_loss_bbox_1_unscaled": 0.238964176338166, "test_loss_giou_1_unscaled": 0.6679766154438257, "test_cardinality_error_1_unscaled": 92.733, "test_loss_ce_2_unscaled": 0.7686088333129882, "test_loss_bbox_2_unscaled": 0.2403091155819595, "test_loss_giou_2_unscaled": 0.6693683328539133, "test_cardinality_error_2_unscaled": 92.733, "test_loss_ce_3_unscaled": 0.7696097208738327, "test_loss_bbox_3_unscaled": 0.23985836801975965, "test_loss_giou_3_unscaled": 0.6641588660359382, "test_cardinality_error_3_unscaled": 92.733, "test_loss_ce_4_unscaled": 0.7666582739412785, "test_loss_bbox_4_unscaled": 0.24270456793904305, "test_loss_giou_4_unscaled": 0.6696397845357657, "test_cardinality_error_4_unscaled": 92.733, "test_coco_eval_bbox": [0.0002048392732184109, 0.0005285462681054293, 0.00013577367190890753, 1.2430999737141855e-05, 0.0002637642099466402, 0.0003463450909022725, 0.007298797791140646, 0.009057064828664984, 0.009277789520137565, 1.1025998142989787e-05, 0.0004416553407773862, 0.015021164785423511], "epoch": 0, "n_parameters": 39744865} {"train_lr": 0.00019999999999975943, "train_class_error": 75.3560462770805, "train_grad_norm": 142.0692724874927, "train_loss": 24.122603570650938, "train_loss_ce": 1.5379108967752102, "train_loss_bbox": 1.195225195155382, "train_loss_giou": 1.2811618591005554, "train_loss_ce_0": 1.5364077599920238, "train_loss_bbox_0": 1.2116031657610835, "train_loss_giou_0": 1.2864278346535445, "train_loss_ce_1": 1.535675086740946, "train_loss_bbox_1": 1.1998680469908387, "train_loss_giou_1": 1.2795544117798732, "train_loss_ce_2": 1.536828916156664, "train_loss_bbox_2": 1.1951256065902403, "train_loss_giou_2": 1.2760442153680194, "train_loss_ce_3": 1.5350048682565043, "train_loss_bbox_3": 1.2051689024240115, "train_loss_giou_3": 1.279889775380999, "train_loss_ce_4": 1.53271080879491, "train_loss_bbox_4": 1.2148458232750474, "train_loss_giou_4": 1.283150424541141, "train_loss_ce_unscaled": 0.7689554483876051, "train_class_error_unscaled": 75.3560462770805, "train_loss_bbox_unscaled": 0.2390450389381701, "train_loss_giou_unscaled": 0.6405809295502777, "train_cardinality_error_unscaled": 93.41684208746523, "train_loss_ce_0_unscaled": 0.7682038799960119, "train_loss_bbox_0_unscaled": 0.24232063305968832, "train_loss_giou_0_unscaled": 0.6432139173267722, "train_cardinality_error_0_unscaled": 93.41684208746523, "train_loss_ce_1_unscaled": 0.767837543370473, "train_loss_bbox_1_unscaled": 0.239973609310439, "train_loss_giou_1_unscaled": 0.6397772058899366, "train_cardinality_error_1_unscaled": 93.41684208746523, "train_loss_ce_2_unscaled": 0.768414458078332, "train_loss_bbox_2_unscaled": 0.2390251211945551, "train_loss_giou_2_unscaled": 0.6380221076840097, "train_cardinality_error_2_unscaled": 93.41684208746523, "train_loss_ce_3_unscaled": 0.7675024341282521, "train_loss_bbox_3_unscaled": 0.2410337803686852, "train_loss_giou_3_unscaled": 0.6399448876904995, "train_cardinality_error_3_unscaled": 93.41684208746523, "train_loss_ce_4_unscaled": 0.766355404397455, "train_loss_bbox_4_unscaled": 0.2429691645347856, "train_loss_giou_4_unscaled": 0.6415752122705705, "train_cardinality_error_4_unscaled": 93.41684208746523, "test_class_error": 75.48261064224243, "test_loss": 24.344096414089204, "test_loss_ce": 1.5468226409077643, "test_loss_bbox": 1.1731176999658346, "test_loss_giou": 1.3290783358454705, "test_loss_ce_0": 1.5543853159308434, "test_loss_bbox_0": 1.2061218362808228, "test_loss_giou_0": 1.3365924843072892, "test_loss_ce_1": 1.5475848024010659, "test_loss_bbox_1": 1.1663360624969006, "test_loss_giou_1": 1.3246267874121667, "test_loss_ce_2": 1.5483648885130883, "test_loss_bbox_2": 1.1765344133526088, "test_loss_giou_2": 1.32678790307045, "test_loss_ce_3": 1.5492910379886626, "test_loss_bbox_3": 1.1780831864148378, "test_loss_giou_3": 1.322745217180252, "test_loss_ce_4": 1.5443842419743539, "test_loss_bbox_4": 1.1829419850468637, "test_loss_giou_4": 1.330297611796856, "test_loss_ce_unscaled": 0.7734113204538822, "test_class_error_unscaled": 75.48261064224243, "test_loss_bbox_unscaled": 0.23462353963404894, "test_loss_giou_unscaled": 0.6645391679227353, "test_cardinality_error_unscaled": 92.733, "test_loss_ce_0_unscaled": 0.7771926579654217, "test_loss_bbox_0_unscaled": 0.2412243671797216, "test_loss_giou_0_unscaled": 0.6682962421536446, "test_cardinality_error_0_unscaled": 92.733, "test_loss_ce_1_unscaled": 0.7737924012005329, "test_loss_bbox_1_unscaled": 0.23326721237376333, "test_loss_giou_1_unscaled": 0.6623133937060833, "test_cardinality_error_1_unscaled": 92.733, "test_loss_ce_2_unscaled": 0.7741824442565441, "test_loss_bbox_2_unscaled": 0.2353068825148046, "test_loss_giou_2_unscaled": 0.663393951535225, "test_cardinality_error_2_unscaled": 92.733, "test_loss_ce_3_unscaled": 0.7746455189943313, "test_loss_bbox_3_unscaled": 0.23561663719937206, "test_loss_giou_3_unscaled": 0.661372608590126, "test_cardinality_error_3_unscaled": 92.733, "test_loss_ce_4_unscaled": 0.7721921209871769, "test_loss_bbox_4_unscaled": 0.2365883969374001, "test_loss_giou_4_unscaled": 0.665148805898428, "test_cardinality_error_4_unscaled": 92.733, "test_coco_eval_bbox": [0.0003188124072810938, 0.0007965338558096675, 0.0002306547702929801, 1.3061712041704372e-05, 0.00028090844323296, 0.0004697613562911153, 0.006288050152131976, 0.008526836116606751, 0.008766002860598586, 1.8570102135561747e-05, 0.0005306292865200669, 0.015147890169300137], "epoch": 1, "n_parameters": 39744865}