Sense-X / Co-DETR

[ICCV 2023] DETRs with Collaborative Hybrid Assignments Training
MIT License
1.03k stars 115 forks source link

训练时间很长正常吗? #192

Open Xuxiaoxiaohaha opened 4 days ago

Xuxiaoxiaohaha commented 4 days ago

在4卡3090中,batchsize设置为1,训练基于Swin-L主干的模型,在冻结了主干的前提下,训练COCO一个epoch需要9小时,正常吗?

TempleX98 commented 20 hours ago

可以提供一下你的config吗

Xuxiaoxiaohaha commented 14 hours ago

可以提供一下你的config吗

谢谢你的回复!下面是我的训练配置,我使用的是co_dino_5scale_swin_large_16e_o365tococo.py,在8张3090上面训练,在冻结了主干的前提下,一个epoch需要大约13小时,下面是具体配置信息:

dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='AutoAugment', policies=[[{ 'type': 'Resize', 'img_scale': [(480, 2048), (512, 2048), (544, 2048), (576, 2048), (608, 2048), (640, 2048), (672, 2048), (704, 2048), (736, 2048), (768, 2048), (800, 2048), (832, 2048), (864, 2048), (896, 2048), (928, 2048), (960, 2048), (992, 2048), (1024, 2048), (1056, 2048), (1088, 2048), (1120, 2048), (1152, 2048), (1184, 2048), (1216, 2048), (1248, 2048), (1280, 2048), (1312, 2048), (1344, 2048), (1376, 2048), (1408, 2048), (1440, 2048), (1472, 2048), (1504, 2048), (1536, 2048)], 'multiscale_mode': 'value', 'keep_ratio': True }], [{ 'type': 'Resize', 'img_scale': [(400, 4200), (500, 4200), (600, 4200)], 'multiscale_mode': 'value', 'keep_ratio': True }, { 'type': 'RandomCrop', 'crop_type': 'absolute_range', 'crop_size': (384, 600), 'allow_negative_crop': True }, { 'type': 'Resize', 'img_scale': [(480, 2048), (512, 2048), (544, 2048), (576, 2048), (608, 2048), (640, 2048), (672, 2048), (704, 2048), (736, 2048), (768, 2048), (800, 2048), (832, 2048), (864, 2048), (896, 2048), (928, 2048), (960, 2048), (992, 2048), (1024, 2048), (1056, 2048), (1088, 2048), (1120, 2048), (1152, 2048), (1184, 2048), (1216, 2048), (1248, 2048), (1280, 2048), (1312, 2048), (1344, 2048), (1376, 2048), (1408, 2048), (1440, 2048), (1472, 2048), (1504, 2048), (1536, 2048)], 'multiscale_mode': 'value', 'override': True, 'keep_ratio': True }]]), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(2048, 1280), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=1, workers_per_gpu=1, train=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_train2017.json', img_prefix='data/coco/train2017/', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='AutoAugment', policies=[[{ 'type': 'Resize', 'img_scale': [(480, 2048), (512, 2048), (544, 2048), (576, 2048), (608, 2048), (640, 2048), (672, 2048), (704, 2048), (736, 2048), (768, 2048), (800, 2048), (832, 2048), (864, 2048), (896, 2048), (928, 2048), (960, 2048), (992, 2048), (1024, 2048), (1056, 2048), (1088, 2048), (1120, 2048), (1152, 2048), (1184, 2048), (1216, 2048), (1248, 2048), (1280, 2048), (1312, 2048), (1344, 2048), (1376, 2048), (1408, 2048), (1440, 2048), (1472, 2048), (1504, 2048), (1536, 2048)], 'multiscale_mode': 'value', 'keep_ratio': True }], [{ 'type': 'Resize', 'img_scale': [(400, 4200), (500, 4200), (600, 4200)], 'multiscale_mode': 'value', 'keep_ratio': True }, { 'type': 'RandomCrop', 'crop_type': 'absolute_range', 'crop_size': (384, 600), 'allow_negative_crop': True }, { 'type': 'Resize', 'img_scale': [(480, 2048), (512, 2048), (544, 2048), (576, 2048), (608, 2048), (640, 2048), (672, 2048), (704, 2048), (736, 2048), (768, 2048), (800, 2048), (832, 2048), (864, 2048), (896, 2048), (928, 2048), (960, 2048), (992, 2048), (1024, 2048), (1056, 2048), (1088, 2048), (1120, 2048), (1152, 2048), (1184, 2048), (1216, 2048), (1248, 2048), (1280, 2048), (1312, 2048), (1344, 2048), (1376, 2048), (1408, 2048), (1440, 2048), (1472, 2048), (1504, 2048), (1536, 2048)], 'multiscale_mode': 'value', 'override': True, 'keep_ratio': True }]]), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ], filter_empty_gt=False), val=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_val2017.json', img_prefix='data/coco/val2017/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(2048, 1280), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_val2017.json', img_prefix='data/coco/val2017/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(2048, 1280), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(interval=1, metric='bbox') checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) custom_hooks = [dict(type='NumClassCheckHook')] dist_params = dict(backend='nccl') log_level = 'INFO' load_from = 'models/co_dino_5scale_swin_large_22e_o365.pth' resume_from = None workflow = [('train', 1)] opencv_num_threads = 0 mp_start_method = 'fork' auto_scale_lr = dict(enable=False, base_batch_size=16) num_dec_layer = 6 lambda_2 = 2.0 model = dict( type='CoDETR', backbone=dict( type='SwinTransformerV1', embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], out_indices=(0, 1, 2, 3), window_size=12, ape=False, drop_path_rate=0.3, patch_norm=True, use_checkpoint=True, pretrained=None,), neck=dict( type='ChannelMapper', in_channels=[192, 384, 768, 1536], kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='GN', num_groups=32), num_outs=5), rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', octave_base_scale=4, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64, 128]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0), loss_bbox=dict(type='L1Loss', loss_weight=12.0)), query_head=dict( type='CoDINOHead', num_query=900, num_classes=80, num_feature_levels=5, in_channels=2048, sync_cls_avg_factor=True, as_two_stage=True, with_box_refine=True, mixed_selection=True, dn_cfg=dict( type='CdnQueryGenerator', noise_scale=dict(label=0.5, box=0.4), group_cfg=dict(dynamic=True, num_groups=None, num_dn_queries=500)), transformer=dict( type='CoDinoTransformer', with_pos_coord=True, with_coord_feat=False, num_co_heads=2, num_feature_levels=5, encoder=dict( type='DetrTransformerEncoder', num_layers=6, with_cp=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=dict( type='MultiScaleDeformableAttention', embed_dims=256, num_levels=5, dropout=0.0), feedforward_channels=2048, ffn_dropout=0.0, operation_order=('self_attn', 'norm', 'ffn', 'norm'))), decoder=dict( type='DinoTransformerDecoder', num_layers=6, return_intermediate=True, transformerlayers=dict( type='DetrTransformerDecoderLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.0), dict( type='MultiScaleDeformableAttention', embed_dims=256, num_levels=5, dropout=0.0) ], feedforward_channels=2048, ffn_dropout=0.0, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')))), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, temperature=20, normalize=True), loss_cls=dict( type='QualityFocalLoss', use_sigmoid=True, beta=2.0, loss_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0)), roi_head=[ dict( type='CoStandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32, 64], finest_scale=56), bbox_head=dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=80, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=True, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=12.0), loss_bbox=dict(type='GIoULoss', loss_weight=120.0))) ], bbox_head=[ dict( type='CoATSSHead', num_classes=80, in_channels=256, stacked_convs=1, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], octave_base_scale=8, scales_per_octave=1, strides=[4, 8, 16, 32, 64, 128]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=12.0), loss_bbox=dict(type='GIoULoss', loss_weight=24.0), loss_centerness=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=12.0)) ], train_cfg=[ dict( assigner=dict( type='HungarianAssigner', cls_cost=dict(type='FocalLossCost', weight=2.0), reg_cost=dict( type='BBoxL1Cost', weight=5.0, box_format='xywh'), iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( nms_pre=4000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False)), dict( assigner=dict(type='ATSSAssigner', topk=9), allowed_border=-1, pos_weight=-1, debug=False) ], test_cfg=[ dict(max_per_img=300, nms=dict(type='soft_nms', iou_threshold=0.8)), dict( rpn=dict( nms_pre=1000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( score_thr=0.0, nms=dict(type='nms', iou_threshold=0.5), max_per_img=100)), dict( nms_pre=1000, min_bbox_size=0, score_thr=0.0, nms=dict(type='nms', iou_threshold=0.6), max_per_img=100) ]) optimizer = dict( type='AdamW', lr=0.0001, weight_decay=0.0001, paramwise_cfg=None) optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) lr_config = dict(policy='step', step=[8]) runner = dict(type='EpochBasedRunner', max_epochs=1) pretrained = None work_dir = 'xqx_exp' auto_resume = False gpu_ids = range(0, 8)

下面是训练的部分日志:

2024-11-17 18:03:39,272 - mmdet - INFO - Epoch [1][50/14786] lr: 1.000e-04, eta: 13:34:21, time: 3.316, data_time: 0.196, memory: 15704, enc_loss_cls: 1.2139, enc_loss_bbox: 0.6363, enc_loss_iou: 1.0582, loss_cls: 1.9909, loss_bbox: 0.3628, loss_iou: 0.6634, d0.loss_cls: 1.7273, d0.loss_bbox: 0.5007, d0.loss_iou: 0.8603, d1.loss_cls: 1.9407, d1.loss_bbox: 0.4114, d1.loss_iou: 0.7481, d2.loss_cls: 1.9556, d2.loss_bbox: 0.3778, d2.loss_iou: 0.6961, d3.loss_cls: 1.9539, d3.loss_bbox: 0.3660, d3.loss_iou: 0.6720, d4.loss_cls: 1.9604, d4.loss_bbox: 0.3633, d4.loss_iou: 0.6658, dn_loss_cls: 2.5820, dn_loss_bbox: 0.2440, dn_loss_iou: 0.2946, d0.dn_loss_cls: 2.4022, d0.dn_loss_bbox: 0.3781, d0.dn_loss_iou: 0.3923, d1.dn_loss_cls: 2.6242, d1.dn_loss_bbox: 0.2796, d1.dn_loss_iou: 0.3249, d2.dn_loss_cls: 2.6785, d2.dn_loss_bbox: 0.2771, d2.dn_loss_iou: 0.3206, d3.dn_loss_cls: 2.6738, d3.dn_loss_bbox: 0.2535, d3.dn_loss_iou: 0.3030, d4.dn_loss_cls: 2.6191, d4.dn_loss_bbox: 0.2440, d4.dn_loss_iou: 0.2945, loss_rpn_cls: 0.2260, loss_rpn_bbox: 0.1392, loss_cls0: 39.4641, acc0: 65.6475, loss_bbox0: 5.8203, loss_cls1: 14.5885, loss_bbox1: 3.9017, loss_centerness1: 7.1724, loss_cls_aux0: 3.3491, loss_bbox_aux0: 0.0526, loss_iou_aux0: 0.0865, d0.loss_cls_aux0: 3.2025, d0.loss_bbox_aux0: 0.1475, d0.loss_iou_aux0: 0.2275, d1.loss_cls_aux0: 3.5376, d1.loss_bbox_aux0: 0.0700, d1.loss_iou_aux0: 0.1160, d2.loss_cls_aux0: 3.4812, d2.loss_bbox_aux0: 0.0575, d2.loss_iou_aux0: 0.0933, d3.loss_cls_aux0: 3.5043, d3.loss_bbox_aux0: 0.0525, d3.loss_iou_aux0: 0.0856, d4.loss_cls_aux0: 3.4005, d4.loss_bbox_aux0: 0.0526, d4.loss_iou_aux0: 0.0860, loss_cls_aux1: 2.8697, loss_bbox_aux1: 0.0967, loss_iou_aux1: 0.1962, d0.loss_cls_aux1: 2.9972, d0.loss_bbox_aux1: 0.1558, d0.loss_iou_aux1: 0.2856, d1.loss_cls_aux1: 3.1244, d1.loss_bbox_aux1: 0.1053, d1.loss_iou_aux1: 0.2086, d2.loss_cls_aux1: 3.0108, d2.loss_bbox_aux1: 0.0991, d2.loss_iou_aux1: 0.2005, d3.loss_cls_aux1: 3.0209, d3.loss_bbox_aux1: 0.0967, d3.loss_iou_aux1: 0.1961, d4.loss_cls_aux1: 2.9191, d4.loss_bbox_aux1: 0.0967, d4.loss_iou_aux1: 0.1961, loss: 153.1018, grad_norm: 196.0960 2024-11-17 18:06:20,327 - mmdet - INFO - Epoch [1][100/14786] lr: 1.000e-04, eta: 13:19:55, time: 3.220, data_time: 0.117, memory: 16025, enc_loss_cls: 1.6181, enc_loss_bbox: 0.6048, enc_loss_iou: 0.9506, loss_cls: 1.3082, loss_bbox: 0.4486, loss_iou: 0.7642, d0.loss_cls: 1.7871, d0.loss_bbox: 0.5076, d0.loss_iou: 0.8289, d1.loss_cls: 1.4863, d1.loss_bbox: 0.4592, d1.loss_iou: 0.7886, d2.loss_cls: 1.3779, d2.loss_bbox: 0.4550, d2.loss_iou: 0.7694, d3.loss_cls: 1.3403, d3.loss_bbox: 0.4495, d3.loss_iou: 0.7637, d4.loss_cls: 1.3090, d4.loss_bbox: 0.4493, d4.loss_iou: 0.7654, dn_loss_cls: 1.8914, dn_loss_bbox: 0.1955, dn_loss_iou: 0.2684, d0.dn_loss_cls: 2.3265, d0.dn_loss_bbox: 0.3002, d0.dn_loss_iou: 0.3480, d1.dn_loss_cls: 2.2063, d1.dn_loss_bbox: 0.2115, d1.dn_loss_iou: 0.2813, d2.dn_loss_cls: 2.0915, d2.dn_loss_bbox: 0.1987, d2.dn_loss_iou: 0.2702, d3.dn_loss_cls: 2.0149, d3.dn_loss_bbox: 0.1955, d3.dn_loss_iou: 0.2687, d4.dn_loss_cls: 1.9194, d4.dn_loss_bbox: 0.1955, d4.dn_loss_iou: 0.2684, loss_rpn_cls: 0.2443, loss_rpn_bbox: 0.1697, loss_cls0: 8.8559, acc0: 86.4912, loss_bbox0: 5.8426, loss_cls1: 12.6179, loss_bbox1: 4.1113, loss_centerness1: 7.2117, loss_cls_aux0: 2.0065, loss_bbox_aux0: 0.0730, loss_iou_aux0: 0.1322, d0.loss_cls_aux0: 2.5614, d0.loss_bbox_aux0: 0.1508, d0.loss_iou_aux0: 0.2481, d1.loss_cls_aux0: 2.4448, d1.loss_bbox_aux0: 0.0861, d1.loss_iou_aux0: 0.1539, d2.loss_cls_aux0: 2.1940, d2.loss_bbox_aux0: 0.0746, d2.loss_iou_aux0: 0.1337, d3.loss_cls_aux0: 2.1443, d3.loss_bbox_aux0: 0.0726, d3.loss_iou_aux0: 0.1315, d4.loss_cls_aux0: 2.0452, d4.loss_bbox_aux0: 0.0729, d4.loss_iou_aux0: 0.1319, loss_cls_aux1: 1.7300, loss_bbox_aux1: 0.1020, loss_iou_aux1: 0.2297, d0.loss_cls_aux1: 2.3659, d0.loss_bbox_aux1: 0.1584, d0.loss_iou_aux1: 0.3179, d1.loss_cls_aux1: 2.1757, d1.loss_bbox_aux1: 0.1092, d1.loss_iou_aux1: 0.2401, d2.loss_cls_aux1: 1.9127, d2.loss_bbox_aux1: 0.1032, d2.loss_iou_aux1: 0.2309, d3.loss_cls_aux1: 1.8566, d3.loss_bbox_aux1: 0.1018, d3.loss_iou_aux1: 0.2292, d4.loss_cls_aux1: 1.7649, d4.loss_bbox_aux1: 0.1019, d4.loss_iou_aux1: 0.2296, loss: 102.5545, grad_norm: 114.1702 2024-11-17 18:08:59,055 - mmdet - INFO - Epoch [1][150/14786] lr: 1.000e-04, eta: 13:09:38, time: 3.175, data_time: 0.111, memory: 16025, enc_loss_cls: 1.3520, enc_loss_bbox: 0.8484, enc_loss_iou: 1.1565, loss_cls: 0.9212, loss_bbox: 0.6415, loss_iou: 0.9409, d0.loss_cls: 1.3208, d0.loss_bbox: 0.7412, d0.loss_iou: 1.0477, d1.loss_cls: 1.1533, d1.loss_bbox: 0.6322, d1.loss_iou: 0.9894, d2.loss_cls: 1.0584, d2.loss_bbox: 0.6551, d2.loss_iou: 0.9611, d3.loss_cls: 0.9874, d3.loss_bbox: 0.6378, d3.loss_iou: 0.9455, d4.loss_cls: 0.9289, d4.loss_bbox: 0.6422, d4.loss_iou: 0.9406, dn_loss_cls: 1.4107, dn_loss_bbox: 0.2269, dn_loss_iou: 0.2615, d0.dn_loss_cls: 2.1103, d0.dn_loss_bbox: 0.2977, d0.dn_loss_iou: 0.3199, d1.dn_loss_cls: 1.9796, d1.dn_loss_bbox: 0.2366, d1.dn_loss_iou: 0.2708, d2.dn_loss_cls: 1.7715, d2.dn_loss_bbox: 0.2252, d2.dn_loss_iou: 0.2602, d3.dn_loss_cls: 1.6045, d3.dn_loss_bbox: 0.2267, d3.dn_loss_iou: 0.2616, d4.dn_loss_cls: 1.4564, d4.dn_loss_bbox: 0.2269, d4.dn_loss_iou: 0.2615, loss_rpn_cls: 0.1861, loss_rpn_bbox: 0.1548, loss_cls0: 6.6524, acc0: 88.6562, loss_bbox0: 4.7933, loss_cls1: 8.9403, loss_bbox1: 4.0416, loss_centerness1: 7.2015, loss_cls_aux0: 1.4616, loss_bbox_aux0: 0.0773, loss_iou_aux0: 0.1402, d0.loss_cls_aux0: 2.0407, d0.loss_bbox_aux0: 0.1610, d0.loss_iou_aux0: 0.2510, d1.loss_cls_aux0: 2.0394, d1.loss_bbox_aux0: 0.0910, d1.loss_iou_aux0: 0.1602, d2.loss_cls_aux0: 1.8006, d2.loss_bbox_aux0: 0.0785, d2.loss_iou_aux0: 0.1408, d3.loss_cls_aux0: 1.6759, d3.loss_bbox_aux0: 0.0770, d3.loss_iou_aux0: 0.1394, d4.loss_cls_aux0: 1.5250, d4.loss_bbox_aux0: 0.0772, d4.loss_iou_aux0: 0.1398, loss_cls_aux1: 1.2320, loss_bbox_aux1: 0.1110, loss_iou_aux1: 0.2341, d0.loss_cls_aux1: 1.8496, d0.loss_bbox_aux1: 0.1661, d0.loss_iou_aux1: 0.3170, d1.loss_cls_aux1: 1.7439, d1.loss_bbox_aux1: 0.1184, d1.loss_iou_aux1: 0.2443, d2.loss_cls_aux1: 1.5058, d2.loss_bbox_aux1: 0.1122, d2.loss_iou_aux1: 0.2353, d3.loss_cls_aux1: 1.4012, d3.loss_bbox_aux1: 0.1111, d3.loss_iou_aux1: 0.2341, d4.loss_cls_aux1: 1.2821, d4.loss_bbox_aux1: 0.1110, d4.loss_iou_aux1: 0.2340, loss: 88.1999, grad_norm: 128.0138 2024-11-17 18:11:42,105 - mmdet - INFO - Epoch [1][200/14786] lr: 1.000e-04, eta: 13:08:20, time: 3.260, data_time: 0.118, memory: 16064, enc_loss_cls: 1.4798, enc_loss_bbox: 0.6104, enc_loss_iou: 0.9526, loss_cls: 0.9570, loss_bbox: 0.4254, loss_iou: 0.7425, d0.loss_cls: 1.5349, d0.loss_bbox: 0.5125, d0.loss_iou: 0.8346, d1.loss_cls: 1.2955, d1.loss_bbox: 0.4415, d1.loss_iou: 0.7871, d2.loss_cls: 1.1326, d2.loss_bbox: 0.4386, d2.loss_iou: 0.7575, d3.loss_cls: 1.0224, d3.loss_bbox: 0.4260, d3.loss_iou: 0.7434, d4.loss_cls: 0.9641, d4.loss_bbox: 0.4261, d4.loss_iou: 0.7389, dn_loss_cls: 1.1259, dn_loss_bbox: 0.1937, dn_loss_iou: 0.2456, d0.dn_loss_cls: 1.9997, d0.dn_loss_bbox: 0.2624, d0.dn_loss_iou: 0.2989, d1.dn_loss_cls: 1.8400, d1.dn_loss_bbox: 0.2020, d1.dn_loss_iou: 0.2527, d2.dn_loss_cls: 1.5509, d2.dn_loss_bbox: 0.1925, d2.dn_loss_iou: 0.2436, d3.dn_loss_cls: 1.3651, d3.dn_loss_bbox: 0.1934, d3.dn_loss_iou: 0.2450, d4.dn_loss_cls: 1.1847, d4.dn_loss_bbox: 0.1936, d4.dn_loss_iou: 0.2453, loss_rpn_cls: 0.1531, loss_rpn_bbox: 0.1612, loss_cls0: 6.0062, acc0: 88.2285, loss_bbox0: 4.6982, loss_cls1: 6.6675, loss_bbox1: 3.9361, loss_centerness1: 7.1796, loss_cls_aux0: 1.1931, loss_bbox_aux0: 0.0812, loss_iou_aux0: 0.1410, d0.loss_cls_aux0: 1.9257, d0.loss_bbox_aux0: 0.1607, d0.loss_iou_aux0: 0.2472, d1.loss_cls_aux0: 1.8776, d1.loss_bbox_aux0: 0.0947, d1.loss_iou_aux0: 0.1614, d2.loss_cls_aux0: 1.5794, d2.loss_bbox_aux0: 0.0828, d2.loss_iou_aux0: 0.1427, d3.loss_cls_aux0: 1.4271, d3.loss_bbox_aux0: 0.0809, d3.loss_iou_aux0: 0.1405, d4.loss_cls_aux0: 1.2613, d4.loss_bbox_aux0: 0.0811, d4.loss_iou_aux0: 0.1408, loss_cls_aux1: 1.0524, loss_bbox_aux1: 0.1169, loss_iou_aux1: 0.2194, d0.loss_cls_aux1: 1.7433, d0.loss_bbox_aux1: 0.1820, d0.loss_iou_aux1: 0.3081, d1.loss_cls_aux1: 1.6614, d1.loss_bbox_aux1: 0.1279, d1.loss_iou_aux1: 0.2317, d2.loss_cls_aux1: 1.3910, d2.loss_bbox_aux1: 0.1188, d2.loss_iou_aux1: 0.2214, d3.loss_cls_aux1: 1.2552, d3.loss_bbox_aux1: 0.1168, d3.loss_iou_aux1: 0.2191, d4.loss_cls_aux1: 1.1087, d4.loss_bbox_aux1: 0.1169, d4.loss_iou_aux1: 0.2193, loss: 79.0895, grad_norm: 159.2715 2024-11-17 18:14:27,719 - mmdet - INFO - Epoch [1][250/14786] lr: 1.000e-04, eta: 13:08:59, time: 3.312, data_time: 0.116, memory: 16064, enc_loss_cls: 1.5725, enc_loss_bbox: 0.5161, enc_loss_iou: 0.7747, loss_cls: 0.9259, loss_bbox: 0.3696, loss_iou: 0.6119, d0.loss_cls: 1.6728, d0.loss_bbox: 0.4214, d0.loss_iou: 0.6599, d1.loss_cls: 1.3148, d1.loss_bbox: 0.3663, d1.loss_iou: 0.6326, d2.loss_cls: 1.0869, d2.loss_bbox: 0.3683, d2.loss_iou: 0.6189, d3.loss_cls: 0.9726, d3.loss_bbox: 0.3663, d3.loss_iou: 0.6066, d4.loss_cls: 0.9280, d4.loss_bbox: 0.3686, d4.loss_iou: 0.6102, dn_loss_cls: 0.9011, dn_loss_bbox: 0.2003, dn_loss_iou: 0.2432, d0.dn_loss_cls: 1.8692, d0.dn_loss_bbox: 0.2558, d0.dn_loss_iou: 0.2911, d1.dn_loss_cls: 1.5837, d1.dn_loss_bbox: 0.2077, d1.dn_loss_iou: 0.2491, d2.dn_loss_cls: 1.2577, d2.dn_loss_bbox: 0.1994, d2.dn_loss_iou: 0.2433, d3.dn_loss_cls: 1.0792, d3.dn_loss_bbox: 0.2002, d3.dn_loss_iou: 0.2432, d4.dn_loss_cls: 0.9271, d4.dn_loss_bbox: 0.2003, d4.dn_loss_iou: 0.2431, loss_rpn_cls: 0.1274, loss_rpn_bbox: 0.1473, loss_cls0: 4.2539, acc0: 90.2422, loss_bbox0: 4.1732, loss_cls1: 4.8264, loss_bbox1: 3.7824, loss_centerness1: 7.1801, loss_cls_aux0: 0.8892, loss_bbox_aux0: 0.0885, loss_iou_aux0: 0.1358, d0.loss_cls_aux0: 1.7620, d0.loss_bbox_aux0: 0.1680, d0.loss_iou_aux0: 0.2375, d1.loss_cls_aux0: 1.6054, d1.loss_bbox_aux0: 0.0985, d1.loss_iou_aux0: 0.1505, d2.loss_cls_aux0: 1.2574, d2.loss_bbox_aux0: 0.0887, d2.loss_iou_aux0: 0.1357, d3.loss_cls_aux0: 1.0955, d3.loss_bbox_aux0: 0.0884, d3.loss_iou_aux0: 0.1350, d4.loss_cls_aux0: 0.9400, d4.loss_bbox_aux0: 0.0884, d4.loss_iou_aux0: 0.1354, loss_cls_aux1: 0.7746, loss_bbox_aux1: 0.1262, loss_iou_aux1: 0.2188, d0.loss_cls_aux1: 1.5667, d0.loss_bbox_aux1: 0.1817, d0.loss_iou_aux1: 0.2933, d1.loss_cls_aux1: 1.3784, d1.loss_bbox_aux1: 0.1326, d1.loss_iou_aux1: 0.2250, d2.loss_cls_aux1: 1.0775, d2.loss_bbox_aux1: 0.1272, d2.loss_iou_aux1: 0.2194, d3.loss_cls_aux1: 0.9485, d3.loss_bbox_aux1: 0.1262, d3.loss_iou_aux1: 0.2185, d4.loss_cls_aux1: 0.8146, d4.loss_bbox_aux1: 0.1262, d4.loss_iou_aux1: 0.2186, loss: 68.5244, grad_norm: 140.0803