SwinTransformer / Swin-Transformer-Object-Detection

This is an official implementation for "Swin Transformer: Hierarchical Vision Transformer using Shifted Windows" on Object Detection and Instance Segmentation.
https://arxiv.org/abs/2103.14030
Apache License 2.0
1.79k stars 378 forks source link

Gradient overflow. Skipping step, loss scaler 0 reducing loss scale to 2.926047721682624e-98 #24

Closed liulangjita closed 3 years ago

liulangjita commented 3 years ago

I trained my custom data from cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py. Because my data only has bbox mark, without mask mask, so I change all with_mask to False in the config files and remove mask_head FCNMaskHead at cascade_mask_rcnn_swin_fpn.py to make train execute successfully. when I trained for about 10 miniutes, the loss begin to nan. 2021-04-29 15:20:07,805 - mmdet - INFO - Epoch [1][1550/9370] lr: 1.000e-04, eta: 2 days, 13:28:02, time: 0.569, data_time: 0.005, memory: 6632, loss_rpn_cls: nan, loss_rpn_bbox: nan, s0.loss_cls: nan, s0.acc: 3.3333, s0.loss_bbox: nan, s1.loss_cls: nan, s1.acc: 3.3333, s1.loss_bbox: nan, s2.loss_cls: nan, s2.acc: 3.3333, s2.loss_bbox: nan, loss: nan Is there something wrong when my trained?

model = dict( type='CascadeRCNN', pretrained='swin_tiny_patch4_window7_224.pth', backbone=dict( type='SwinTransformer', embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7, mlp_ratio=4.0, qkv_bias=True, qk_scale=None, drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.2, ape=False, patch_norm=True, out_indices=(0, 1, 2, 3), use_checkpoint=False), neck=dict( type='FPN', in_channels=[96, 192, 384, 768], out_channels=256, num_outs=5), rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)), roi_head=dict( type='CascadeRoIHead', num_stages=3, stage_loss_weights=[1, 0.5, 0.25], bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=[ dict( type='ConvFCBBoxHead', num_shared_convs=4, num_shared_fcs=1, in_channels=256, conv_out_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=26, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=True, norm_cfg=dict(type='BN', requires_grad=True), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=10.0)), dict( type='ConvFCBBoxHead', num_shared_convs=4, num_shared_fcs=1, in_channels=256, conv_out_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=26, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.05, 0.05, 0.1, 0.1]), reg_class_agnostic=False, reg_decoded_bbox=True, norm_cfg=dict(type='BN', requires_grad=True), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=10.0)), dict( type='ConvFCBBoxHead', num_shared_convs=4, num_shared_fcs=1, in_channels=256, conv_out_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=26, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.033, 0.033, 0.067, 0.067]), reg_class_agnostic=False, reg_decoded_bbox=True, norm_cfg=dict(type='BN', requires_grad=True), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=10.0)) ], mask_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32])), train_cfg=dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=0, pos_weight=-1, debug=False), rpn_proposal=dict( nms_across_levels=False, nms_pre=2000, nms_post=2000, max_per_img=2000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=[ dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), mask_size=28, pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), mask_size=28, pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.7, min_pos_iou=0.7, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), mask_size=28, pos_weight=-1, debug=False) ]), test_cfg=dict( rpn=dict( nms_across_levels=False, nms_pre=1000, nms_post=1000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='nms', iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5))) dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True, with_mask=False), dict(type='RandomFlip', flip_ratio=0.5), dict( type='AutoAugment', policies=[[{ 'type': 'Resize', 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], 'multiscale_mode': 'value', 'keep_ratio': True }], [{ 'type': 'Resize', 'img_scale': [(400, 1333), (500, 1333), (600, 1333)], 'multiscale_mode': 'value', 'keep_ratio': True }, { 'type': 'RandomCrop', 'crop_type': 'absolute_range', 'crop_size': (384, 600), 'allow_negative_crop': True }, { 'type': 'Resize', 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], 'multiscale_mode': 'value', 'override': True, 'keep_ratio': True }]]), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(800, 400), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=2, train=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_train2017.json', img_prefix='data/coco/train2017/', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True, with_mask=False), dict(type='RandomFlip', flip_ratio=0.5), dict( type='AutoAugment', policies=[[{ 'type': 'Resize', 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], 'multiscale_mode': 'value', 'keep_ratio': True }], [{ 'type': 'Resize', 'img_scale': [(400, 1333), (500, 1333), (600, 1333)], 'multiscale_mode': 'value', 'keep_ratio': True }, { 'type': 'RandomCrop', 'crop_type': 'absolute_range', 'crop_size': (384, 600), 'allow_negative_crop': True }, { 'type': 'Resize', 'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], 'multiscale_mode': 'value', 'override': True, 'keep_ratio': True }]]), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]), val=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_val2017.json', img_prefix='data/coco/val2017/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(800, 400), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_val2017.json', img_prefix='data/coco/val2017/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(800, 400), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(metric=['bbox', 'segm']) optimizer = dict( type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, paramwise_cfg=dict( custom_keys=dict( absolute_pos_embed=dict(decay_mult=0.0), relative_position_bias_table=dict(decay_mult=0.0), norm=dict(decay_mult=0.0)))) optimizer_config = dict( grad_clip=None, type='DistOptimizerHook', update_interval=1, coalesce=True, bucket_size_mb=-1, use_fp16=True) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[27, 33]) runner = dict(type='EpochBasedRunnerAmp', max_epochs=36) checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) custom_hooks = [dict(type='NumClassCheckHook')] dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] fp16 = None work_dir = 'work_dirs' gpu_ids = [1] Thanks very much!

liulangjita commented 3 years ago

I remove apex, and this problem resolved.

Kunlei-Hong commented 3 years ago

I remove apex, and this problem resolved. how to remove apex,Could you give me some tips?

SinaMohseni commented 3 years ago

I remove apex, and this problem resolved. how to remove apex,Could you give me some tips?

go to the config file and change runner type from EpochBasedRunnerAmp to EpochBasedRunner and comment out the optimizer_config see more on here