open-mmlab / mmrotate

OpenMMLab Rotated Object Detection Toolbox and Benchmark
https://mmrotate.readthedocs.io/en/latest/
Apache License 2.0
1.84k stars 541 forks source link

[Bug] Unable to reproduce the training of s2anet model #939

Open promisekoloer opened 1 year ago

promisekoloer commented 1 year ago

Prerequisite

Task

I'm using the official example scripts/configs for the officially supported tasks/models/datasets.

Branch

1.x branch https://github.com/open-mmlab/mmrotate/tree/1.x

Environment

sys.platform: linux Python: 3.8.8 (default, Feb 24 2021, 21:46:12) [GCC 7.3.0] CUDA available: True numpy_random_seed: 2147483648 GPU 0: NVIDIA TITAN Xp CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 11.1, V11.1.105 GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 PyTorch: 1.8.1 PyTorch compiling details: PyTorch built with:

TorchVision: 0.9.1 OpenCV: 4.8.0 MMEngine: 0.8.4 MMRotate: 1.0.0rc1+

Reproduces the problem - code sample

''' angle_version = 'le135' backend_args = None data_root = 'data/split_ss_dota/' dataset_type = 'DOTADataset' default_hooks = dict( checkpoint=dict(interval=1, type='CheckpointHook'), logger=dict(interval=200, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='mmdet.DetVisualizationHook')) default_scope = 'mmrotate' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) model = dict( backbone=dict( depth=50, frozen_stages=1, init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'), norm_cfg=dict(requires_grad=True, type='BN'), norm_eval=True, num_stages=4, out_indices=( 0, 1, 2, 3, ), style='pytorch', type='mmdet.ResNet', zero_init_residual=False), bbox_head_init=dict( anchor_generator=dict( angle_version='le135', ratios=[ 1.0, ], scales=[ 4, ], strides=[ 8, 16, 32, 64, 128, ], type='FakeRotatedAnchorGenerator'), bbox_coder=dict( angle_version='le135', edge_swap=False, norm_factor=1, proj_xy=True, target_means=( 0.0, 0.0, 0.0, 0.0, 0.0, ), target_stds=( 1.0, 1.0, 1.0, 1.0, 1.0, ), type='DeltaXYWHTRBBoxCoder', use_box_type=False), feat_channels=256, in_channels=256, loss_bbox=dict(beta=0.11, loss_weight=1.0, type='mmdet.SmoothL1Loss'), loss_cls=dict( alpha=0.25, gamma=2.0, loss_weight=1.0, type='mmdet.FocalLoss', use_sigmoid=True), num_classes=15, stacked_convs=2, type='S2AHead'), bbox_head_refine=[ dict( anchor_generator=dict( strides=[ 8, 16, 32, 64, 128, ], type='PseudoRotatedAnchorGenerator'), bbox_coder=dict( angle_version='le135', edge_swap=False, norm_factor=1, proj_xy=True, target_means=( 0.0, 0.0, 0.0, 0.0, 0.0, ), target_stds=( 1.0, 1.0, 1.0, 1.0, 1.0, ), type='DeltaXYWHTRBBoxCoder'), feat_channels=256, frm_cfg=dict( feat_channels=256, kernel_size=3, strides=[ 8, 16, 32, 64, 128, ], type='AlignConv'), in_channels=256, loss_bbox=dict( beta=0.11, loss_weight=1.0, type='mmdet.SmoothL1Loss'), loss_cls=dict( alpha=0.25, gamma=2.0, loss_weight=1.0, type='mmdet.FocalLoss', use_sigmoid=True), num_classes=15, stacked_convs=2, type='S2ARefineHead'), ], data_preprocessor=dict( bgr_to_rgb=True, boxtype2tensor=False, mean=[ 123.675, 116.28, 103.53, ], pad_size_divisor=32, std=[ 58.395, 57.12, 57.375, ], type='mmdet.DetDataPreprocessor'), neck=dict( add_extra_convs='on_input', in_channels=[ 256, 512, 1024, 2048, ], num_outs=5, out_channels=256, start_level=1, type='mmdet.FPN'), test_cfg=dict( max_per_img=2000, min_bbox_size=0, nms=dict(iou_threshold=0.1, type='nms_rotated'), nms_pre=2000, score_thr=0.05), train_cfg=dict( init=dict( allowed_border=-1, assigner=dict( ignore_iof_thr=-1, iou_calculator=dict(type='RBboxOverlaps2D'), min_pos_iou=0, neg_iou_thr=0.4, pos_iou_thr=0.5, type='mmdet.MaxIoUAssigner'), debug=False, pos_weight=-1), refine=[ dict( allowed_border=-1, assigner=dict( ignore_iof_thr=-1, iou_calculator=dict(type='RBboxOverlaps2D'), min_pos_iou=0, neg_iou_thr=0.4, pos_iou_thr=0.5, type='mmdet.MaxIoUAssigner'), debug=False, pos_weight=-1), ], stage_loss_weights=[ 1.0, ]), type='RefineSingleStageDetector') optim_wrapper = dict( clip_grad=dict(max_norm=35, norm_type=2), optimizer=dict(lr=0.005, momentum=0.9, type='SGD', weight_decay=0.0001), type='OptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=False, end=500, start_factor=0.3333333333333333, type='LinearLR'), dict( begin=0, by_epoch=True, end=12, gamma=0.1, milestones=[ 8, 11, ], type='MultiStepLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='/mnt/share/ss_test/images/', data_prefix=dict(img_path='/mnt/share/ss_test/images/'), data_root='data/split_ss_dota/', pipeline=[ dict(backend_args=None, type='mmdet.LoadImageFromFile'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='mmdet.Resize'), dict( box_type='qbox', type='mmdet.LoadAnnotations', with_bbox=True), dict( box_type_mapping=dict(gt_bboxes='rbox'), type='ConvertBoxType'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='mmdet.PackDetInputs'), ], test_mode=True, type='DOTADataset'), drop_last=False, num_workers=2, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict(metric='mAP', type='DOTAMetric') test_pipeline = [ dict(backend_args=None, type='mmdet.LoadImageFromFile'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='mmdet.Resize'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='mmdet.PackDetInputs'), ] train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=6) train_dataloader = dict( batch_sampler=None, batch_size=2, dataset=dict( ann_file='/mnt/share/ss_trainval/annfiles/', data_prefix=dict( img_path='/mnt/share/ss_trainval/images/'), data_root='data/split_ss_dota/', filter_cfg=dict(filter_empty_gt=True), pipeline=[ dict(backend_args=None, type='mmdet.LoadImageFromFile'), dict( box_type='qbox', type='mmdet.LoadAnnotations', with_bbox=True), dict( box_type_mapping=dict(gt_bboxes='rbox'), type='ConvertBoxType'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='mmdet.Resize'), dict( direction=[ 'horizontal', 'vertical', 'diagonal', ], prob=0.75, type='mmdet.RandomFlip'), dict(type='mmdet.PackDetInputs'), ], type='DOTADataset'), num_workers=2, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=None, type='mmdet.LoadImageFromFile'), dict(box_type='qbox', type='mmdet.LoadAnnotations', with_bbox=True), dict(box_type_mapping=dict(gt_bboxes='rbox'), type='ConvertBoxType'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='mmdet.Resize'), dict( direction=[ 'horizontal', 'vertical', 'diagonal', ], prob=0.75, type='mmdet.RandomFlip'), dict(type='mmdet.PackDetInputs'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file= '/mnt/share/dota/split_ss_dota/val/annfiles/', data_prefix=dict( img_path= '/mnt/share/dota/split_ss_dota/val/images/'), data_root='data/split_ss_dota/', pipeline=[ dict(backend_args=None, type='mmdet.LoadImageFromFile'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='mmdet.Resize'), dict( box_type='qbox', type='mmdet.LoadAnnotations', with_bbox=True), dict( box_type_mapping=dict(gt_bboxes='rbox'), type='ConvertBoxType'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='mmdet.PackDetInputs'), ], test_mode=True, type='DOTADataset'), drop_last=False, num_workers=2, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict(metric='mAP', type='DOTAMetric') val_pipeline = [ dict(backend_args=None, type='mmdet.LoadImageFromFile'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='mmdet.Resize'), dict(box_type='qbox', type='mmdet.LoadAnnotations', with_bbox=True), dict(box_type_mapping=dict(gt_bboxes='rbox'), type='ConvertBoxType'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='mmdet.PackDetInputs'), ] vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='RotLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs/s2anet-le135_r50_fpn_1x_dota' '''

Reproduces the problem - command or script

The mmrotate version is 1.x. I run s2anet-le135_r50_fpn_1x_dota.py without any change.The train dataset is dota's trainval. The config file is above. And I value in val dataset it only have 0.769 mAP. But I have run the same baseline in mmrotate 0.3.4 with same config. The mAP can reach 0.800 at least in val dataset and reach 0.7319 online test. What does it modify in new version? And I find the new detector cascade s2anet's accuracy is lower too, only have 0.7612 which I train in trainval dataset and value in val dataset. Thank you for your reply!

Reproduces the problem - error message

the mmrotate 1.x, s2anet_le135_12epoch test in val result is below: +--------------------+-------+-------+--------+-------+ | class | gts | dets | recall | ap | +--------------------+-------+-------+--------+-------+ | plane | 4449 | 15281 | 0.961 | 0.904 | | baseball-diamond | 358 | 9264 | 0.927 | 0.788 | | bridge | 785 | 22306 | 0.803 | 0.589 | | ground-track-field | 212 | 9500 | 0.939 | 0.771 | | small-vehicle | 10579 | 77346 | 0.872 | 0.706 | | large-vehicle | 8819 | 56336 | 0.950 | 0.863 | | ship | 18537 | 53146 | 0.957 | 0.888 | | tennis-court | 1512 | 10243 | 0.964 | 0.909 | | basketball-court | 266 | 6952 | 0.977 | 0.848 | | storage-tank | 4740 | 30628 | 0.828 | 0.767 | | soccer-ball-field | 251 | 7835 | 0.845 | 0.696 | | roundabout | 275 | 10554 | 0.924 | 0.774 | | harbor | 4167 | 29900 | 0.867 | 0.773 | | swimming-pool | 732 | 11946 | 0.867 | 0.678 | | helicopter | 122 | 8793 | 0.869 | 0.587 | +--------------------+-------+-------+--------+-------+ | mAP | | | | 0.769 | +--------------------+-------+-------+--------+-------+

the mmrotate 1.x, the cascade_s2anet_le135_12epoch test in val result is below: +--------------------+-------+-------+--------+-------+ | class | gts | dets | recall | ap | +--------------------+-------+-------+--------+-------+ | plane | 4449 | 17367 | 0.959 | 0.905 | | baseball-diamond | 358 | 11878 | 0.922 | 0.780 | | bridge | 785 | 27009 | 0.810 | 0.585 | | ground-track-field | 212 | 8910 | 0.929 | 0.816 | | small-vehicle | 10579 | 88917 | 0.836 | 0.679 | | large-vehicle | 8819 | 64417 | 0.951 | 0.863 | | ship | 18537 | 58490 | 0.933 | 0.885 | | tennis-court | 1512 | 13832 | 0.965 | 0.909 | | basketball-court | 266 | 8570 | 0.977 | 0.842 | | storage-tank | 4740 | 38967 | 0.826 | 0.761 | | soccer-ball-field | 251 | 9396 | 0.797 | 0.617 | | roundabout | 275 | 9964 | 0.887 | 0.730 | | harbor | 4167 | 32023 | 0.865 | 0.772 | | swimming-pool | 732 | 12032 | 0.865 | 0.685 | | helicopter | 122 | 12755 | 0.885 | 0.589 | +--------------------+-------+-------+--------+-------+ | mAP | | | | 0.761 | +--------------------+-------+-------+--------+-------+

Additional information

  1. I use DOTA dataset with ss_trainval and size is (1024,1024)
  2. When I download official model it doesn't work in new frame because the model componet names are changed. If you can train it again and give me some advice, I would appreciate it very much.
promisekoloer commented 5 months ago

I got it!! dota.py lost 'dict(type='mmdet.FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2))' in train_pipeline in new version mmrotate1.x