open-mmlab / mmrotate

OpenMMLab Rotated Object Detection Toolbox and Benchmark
https://mmrotate.readthedocs.io/en/latest/
Apache License 2.0
1.84k stars 541 forks source link

[Docs] an question about config of rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py #752

Open yangtian6781 opened 1 year ago

yangtian6781 commented 1 year ago

Branch

1.x branch https://mmrotate.readthedocs.io/en/1.x/

📚 The doc issue

i notice two lines code in python file:

batch_size = (2 GPUs) x (4 samples per GPU) = 8

train_dataloader = dict(batch_size=4, num_workers=4)

我用一张3090进行训练,有如下问题: 1.上面的代码是否意味着我用的是batchsize=4,那行注释是什么意思?是都意味着我用两张显卡训练的时候batchsize自动设置成8 2.如果我用的是batchsize=4,为了复现出论文的结果,是否应该按照‘batch/2的时候,学习率也要同时除以2’的原则把学习率除以二

下面是我的详细配置:

System environment: sys.platform: linux Python: 3.7.13 (default, Mar 29 2022, 02:18:16) [GCC 7.5.0] CUDA available: True numpy_random_seed: 1783001956 GPU 0: NVIDIA GeForce RTX 3090 CUDA_HOME: /usr/local/cuda GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 PyTorch: 1.13.1+cu117 PyTorch compiling details: PyTorch built with:

Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl'} seed: None Distributed launcher: none Distributed training: False GPU number: 1

2023/02/28 09:44:26 - mmengine - INFO - Config: default_scope = 'mmrotate' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='mmdet.DetVisualizationHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='RotLocalVisualizer', vis_backends=[dict(type='LocalVisBackend')], name='visualizer') log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) log_level = 'INFO' load_from = None resume = False custom_hooks = [ dict(type='mmdet.NumClassCheckHook'), dict( type='EMAHook', ema_type='mmdet.ExpMomentumEMA', momentum=0.0002, update_buffers=True, priority=49) ] max_epochs = 36 base_lr = 0.00025 interval = 12 train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=36, val_interval=12) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') param_scheduler = [ dict( type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0, end=1000), dict( type='CosineAnnealingLR', eta_min=1.25e-05, begin=18, end=36, T_max=18, by_epoch=True, convert_to_iter_based=True) ] optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='AdamW', lr=0.00025, weight_decay=0.05), paramwise_cfg=dict( norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) dataset_type = 'DOTADataset' data_root = '/home/ljy/mmrotate1/dota_to_val_ms_1024/' file_client_args = dict(backend='disk') train_pipeline = [ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.RandomFlip', prob=0.75, direction=['horizontal', 'vertical', 'diagonal']), dict( type='RandomRotate', prob=0.5, angle_range=180, rect_obj_labels=[9, 11]), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict(type='mmdet.PackDetInputs') ] val_pipeline = [ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ] test_pipeline = [ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ] train_dataloader = dict( batch_size=4, num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), batch_sampler=None, pin_memory=False, dataset=dict( type='DOTADataset', data_root='/home/ljy/mmrotate1/dota_to_val_ms_1024/', ann_file='train/annfiles/', data_prefix=dict(img_path='train/images/'), img_shape=(1024, 1024), filter_cfg=dict(filter_empty_gt=True), pipeline=[ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict( type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict( type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.RandomFlip', prob=0.75, direction=['horizontal', 'vertical', 'diagonal']), dict( type='RandomRotate', prob=0.5, angle_range=180, rect_obj_labels=[9, 11]), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict(type='mmdet.PackDetInputs') ])) val_dataloader = dict( batch_size=1, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='DOTADataset', data_root='/home/ljy/mmrotate1/dota_to_val_ms_1024/', ann_file='val/annfiles/', data_prefix=dict(img_path='val/images/'), img_shape=(1024, 1024), test_mode=True, pipeline=[ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict( type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ])) test_dataloader = dict( batch_size=1, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='DOTADataset', data_root='/home/ljy/mmrotate1/dota_to_val_ms_1024/', ann_file='val/annfiles/', data_prefix=dict(img_path='val/images/'), img_shape=(1024, 1024), test_mode=True, pipeline=[ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict( type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ])) val_evaluator = dict(type='DOTAMetric', metric='mAP') test_evaluator = dict(type='DOTAMetric', metric='mAP') checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' angle_version = 'le90' model = dict( type='mmdet.RTMDet', data_preprocessor=dict( type='mmdet.DetDataPreprocessor', mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], bgr_to_rgb=False, boxtype2tensor=False, batch_augments=None), backbone=dict( type='mmdet.CSPNeXt', arch='P5', expand_ratio=0.5, deepen_factor=1, widen_factor=1, channel_attention=True, norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU'), init_cfg=dict( type='Pretrained', prefix='backbone.', checkpoint= 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_l_8xb32-300e_coco/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth' )), neck=dict( type='mmdet.CSPNeXtPAFPN', in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3, expand_ratio=0.5, norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU'), init_cfg=dict( type='Pretrained', prefix='neck.', checkpoint= 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_l_8xb32-300e_coco/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth' )), bbox_head=dict( type='RotatedRTMDetSepBNHead', num_classes=15, in_channels=256, stacked_convs=2, feat_channels=256, angle_version='le90', anchor_generator=dict( type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), bbox_coder=dict(type='DistanceAnglePointCoder', angle_version='le90'), loss_cls=dict( type='mmdet.QualityFocalLoss', use_sigmoid=True, beta=2.0, loss_weight=1.0), loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), with_objectness=False, exp_on_reg=True, share_conv=True, pred_kernel_size=1, use_hbbox_loss=False, scale_angle=False, loss_angle=None, norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU'), init_cfg=dict( type='Pretrained', prefix='bbox_head.', checkpoint= 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_l_8xb32-300e_coco/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth' )), train_cfg=dict( assigner=dict( type='mmdet.DynamicSoftLabelAssigner', iou_calculator=dict(type='RBboxOverlaps2D'), topk=13), allowed_border=-1, pos_weight=-1, debug=False), test_cfg=dict( nms_pre=2000, min_bbox_size=0, score_thr=0.05, nms=dict(type='nms_rotated', iou_threshold=0.1), max_per_img=2000)) coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_l_8xb32-300e_coco/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth' launcher = 'none' work_dir = '/home/ljy/mmrotate1/work_dirs/rtmdet_dota'

Suggest a potential alternative/fix

No response

yangtian6781 commented 1 year ago

i notice that the log of rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py is little different from my log. in same epoch, my loss is little bigger than loss of office log

zytx121 commented 1 year ago

Hi @mkzhcak, You can try changing the learning rate to half of the original. However, DOTA dataset is much smaller than Coco dataset. However, DOTA dataset is much smaller than COCO, and linear learning rate scaling is only sometimes useful. According to our experience: 1x8b>2x4b>1x4b

yangtian6781 commented 1 year ago

thanks for your reply, During the training, I only used the training set for multi-scale training, and then tested on the val set. The map is only 70.7, which may be batch_size is too small. I will use the model of rtmdet_l to add batch to 8, hoping to reproduce the official effect. I have another question, what is an acceptable error between the val set and the test set?

yangtian6781 commented 1 year ago

Hi @mkzhcak, You can try changing the learning rate to half of the original. However, DOTA dataset is much smaller than Coco dataset. However, DOTA dataset is much smaller than COCO, and linear learning rate scaling is only sometimes useful. According to our experience: 1x8b>2x4b>1x4b

您好,我不清楚为什么我把batch加到8仍然无法复现官方给出的结果,是train集比trainval集小的缘故吗,但似乎也不应该相差十个点,下面是我的详细配置,您可以帮我看看吗? 2023/03/02 15:36:52 - mmengine - INFO -

System environment: sys.platform: linux Python: 3.7.13 (default, Mar 29 2022, 02:18:16) [GCC 7.5.0] CUDA available: True numpy_random_seed: 2085261137 GPU 0: NVIDIA GeForce RTX 3090 CUDA_HOME: /usr/local/cuda GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 PyTorch: 1.13.1+cu117 PyTorch compiling details: PyTorch built with:

Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl'} seed: None Distributed launcher: none Distributed training: False GPU number: 1

2023/03/02 15:36:53 - mmengine - INFO - Config: default_scope = 'mmrotate' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='mmdet.DetVisualizationHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='RotLocalVisualizer', vis_backends=[dict(type='LocalVisBackend')], name='visualizer') log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) log_level = 'INFO' load_from = None resume = False custom_hooks = [ dict(type='mmdet.NumClassCheckHook'), dict( type='EMAHook', ema_type='mmdet.ExpMomentumEMA', momentum=0.0002, update_buffers=True, priority=49) ] max_epochs = 36 base_lr = 0.00025 interval = 12 train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=36, val_interval=12) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') param_scheduler = [ dict( type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0, end=1000), dict( type='CosineAnnealingLR', eta_min=1.25e-05, begin=18, end=36, T_max=18, by_epoch=True, convert_to_iter_based=True) ] optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='AdamW', lr=0.00025, weight_decay=0.05), paramwise_cfg=dict( norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) dataset_type = 'DOTADataset' data_root = '/home/ljy/mmrotate1/dota_to_val_ms_1024/' file_client_args = dict(backend='disk') train_pipeline = [ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.RandomFlip', prob=0.75, direction=['horizontal', 'vertical', 'diagonal']), dict( type='RandomRotate', prob=0.5, angle_range=180, rect_obj_labels=[9, 11]), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict(type='mmdet.PackDetInputs') ] val_pipeline = [ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ] test_pipeline = [ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ] train_dataloader = dict( batch_size=8, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), batch_sampler=None, pin_memory=False, dataset=dict( type='DOTADataset', data_root='/home/ljy/mmrotate1/dota_to_val_ms_1024/', ann_file='train/annfiles/', data_prefix=dict(img_path='train/images/'), img_shape=(1024, 1024), filter_cfg=dict(filter_empty_gt=True), pipeline=[ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict( type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict( type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.RandomFlip', prob=0.75, direction=['horizontal', 'vertical', 'diagonal']), dict( type='RandomRotate', prob=0.5, angle_range=180, rect_obj_labels=[9, 11]), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict(type='mmdet.PackDetInputs') ])) val_dataloader = dict( batch_size=1, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='DOTADataset', data_root='/home/ljy/mmrotate1/dota_to_val_ms_1024/', ann_file='val/annfiles/', data_prefix=dict(img_path='val/images/'), img_shape=(1024, 1024), test_mode=True, pipeline=[ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict( type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ])) test_dataloader = dict( batch_size=1, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='DOTADataset', data_root='/home/ljy/mmrotate1/dota_to_val_ms_1024/', ann_file='val/annfiles/', data_prefix=dict(img_path='val/images/'), img_shape=(1024, 1024), test_mode=True, pipeline=[ dict( type='mmdet.LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), dict( type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), dict( type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), dict( type='mmdet.Pad', size=(1024, 1024), pad_val=dict(img=(114, 114, 114))), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ])) val_evaluator = dict(type='DOTAMetric', metric='mAP') test_evaluator = dict(type='DOTAMetric', metric='mAP') checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' angle_version = 'le90' model = dict( type='mmdet.RTMDet', data_preprocessor=dict( type='mmdet.DetDataPreprocessor', mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], bgr_to_rgb=False, boxtype2tensor=False, batch_augments=None), backbone=dict( type='mmdet.CSPNeXt', arch='P5', expand_ratio=0.5, deepen_factor=0.33, widen_factor=0.5, channel_attention=True, norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU'), init_cfg=dict( type='Pretrained', prefix='backbone.', checkpoint= 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' )), neck=dict( type='mmdet.CSPNeXtPAFPN', in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1, expand_ratio=0.5, norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU')), bbox_head=dict( type='RotatedRTMDetSepBNHead', num_classes=15, in_channels=128, stacked_convs=2, feat_channels=128, angle_version='le90', anchor_generator=dict( type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), bbox_coder=dict(type='DistanceAnglePointCoder', angle_version='le90'), loss_cls=dict( type='mmdet.QualityFocalLoss', use_sigmoid=True, beta=2.0, loss_weight=1.0), loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), with_objectness=False, exp_on_reg=False, share_conv=True, pred_kernel_size=1, use_hbbox_loss=False, scale_angle=False, loss_angle=None, norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU')), train_cfg=dict( assigner=dict( type='mmdet.DynamicSoftLabelAssigner', iou_calculator=dict(type='RBboxOverlaps2D'), topk=13), allowed_border=-1, pos_weight=-1, debug=False), test_cfg=dict( nms_pre=2000, min_bbox_size=0, score_thr=0.05, nms=dict(type='nms_rotated', iou_threshold=0.1), max_per_img=2000)) launcher = 'none' work_dir = '/home/ljy/mmrotate1/work_dirs/rtmdet_dota_s'

yangtian6781 commented 1 year ago

@zytx121

zytx121 commented 1 year ago

Hi @mkzhcak, the results of val and test can vary greatly. It is recommended to use tainval set for training and test set for testing.