Closed ZhenboZhao77 closed 5 days ago
base = [ '../base/datasets/dotav1.py', '../base/schedules/schedule_1x.py', '../base/default_runtime.py' ]
data_root = '/data/PETDet/data/split_ss_dota/'
store_dir = '/data/PointOBB-v2/work_cpm'
angle_version = 'le90'
classes = ('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter')
img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(1024, 1024)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version=angle_version), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]
# train=dict(pipeline=train_pipeline, version=angle_version),
# val=dict(version=angle_version),
# test=dict(version=angle_version))
data = dict( train=dict( pipeline=train_pipeline, ann_file=data_root + 'trainval/annfiles/', img_prefix=data_root + 'trainval/images/', version=angle_version, classes=classes), val=dict( ann_file=data_root + 'trainval/annfiles/', img_prefix=data_root + 'trainval/images/', version=angle_version, classes=classes), test=dict( ann_file=data_root + 'test/images/', img_prefix=data_root + 'test/images/', version=angle_version, classes=classes, samples_per_gpu=4))
model = dict( type='RotatedFCOS', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, zero_init_residual=False, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, start_level=0, add_extra_convs='on_output', # use P5 num_outs=6, relu_before_extra_convs=True),
bbox_head=dict(
type='CPMHead',
num_classes=len(classes),
in_channels=256,
stacked_convs=4,
feat_channels=256,
regress_ranges=((-1, 32), (32, 64), (64, 128), (128, 256), (256, 512),
(512, 1e8)),
strides=[4, 8, 16, 32, 64, 128],
center_sampling=True,
center_sample_radius=1.5,
norm_on_bbox=True,
centerness_on_reg=True,
separate_angle=False,
scale_angle=True,
bbox_coder=dict(
type='DistanceAnglePointCoder', angle_version=angle_version),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='RotatedIoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
# training and testing settings
train_cfg=dict(
visualize=True,
store_dir=store_dir,
cls_weight=1.0,
thresh1=6,
alpha=1.5
),
test_cfg=dict(
store_dir=store_dir,
nms_pre=2000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(iou_thr=0.1),
max_per_img=2000))
find_unused_parameters = True runner = dict(delete=True, type='EpochBasedRunner', max_epochs=6) lr_config = dict( delete=True, policy='step', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, step=[4]) evaluation = dict(interval=6, metric='mAP') optimizer = dict(lr=0.05/2)
base = [ '../base/datasets/dotav1.py', '../base/schedules/schedule_1x.py', '../base/default_runtime.py' ]
data_root = '/data/PETDet/data/split_ss_dota/'
store_dir = '/data/PointOBB-v2/work_cpm'
angle_version = 'le90'
classes = ('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter')
img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(1024, 1024)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version=angle_version), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]
# train=dict(pipeline=train_pipeline, version=angle_version),
# val=dict(version=angle_version),
# test=dict(version=angle_version))
data = dict( train=dict( pipeline=train_pipeline, ann_file=data_root + 'trainval/annfiles/', img_prefix=data_root + 'trainval/images/', version=angle_version, classes=classes), val=dict( ann_file=data_root + 'trainval/annfiles/', img_prefix=data_root + 'trainval/images/', version=angle_version, classes=classes), test=dict( ann_file=data_root + 'test/images/', img_prefix=data_root + 'test/images/', version=angle_version, classes=classes, samples_per_gpu=4))
model = dict( type='RotatedFCOS', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, zero_init_residual=False, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, start_level=0, add_extra_convs='on_output', # use P5 num_outs=6, relu_before_extra_convs=True),
bbox_head=dict(
type='PseudoLabelHead',
num_classes=len(classes),
in_channels=256,
stacked_convs=4,
feat_channels=256,
regress_ranges=((-1, 32), (32, 64), (64, 128), (128, 256), (256, 512),
(512, 1e8)),
strides=[4, 8, 16, 32, 64, 128],
center_sampling=True,
center_sample_radius=1.5,
norm_on_bbox=True,
centerness_on_reg=True,
separate_angle=False,
scale_angle=True,
bbox_coder=dict(
type='DistanceAnglePointCoder', angle_version=angle_version),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='RotatedIoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
# training and testing settings
train_cfg=dict(
store_dir=store_dir,
cls_weight=1.0,
thresh3=[0.03, 0.04, 0.1, 0.01, 0.10, 0.06, 0.08, 0.02, 0.01, 0.03, 0.005, 0.02, 0.05, 0.1, 0.015],
pca_length=20,
store_ann_dir='/data/PointOBB-v2/work_dirs/generate_pseudo_label_dotav10/',
multiple_factor=1/4
),
test_cfg=dict(
store_dir=store_dir,
nms_pre=2000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(iou_thr=0.1),
max_per_img=2000))
find_unused_parameters = True
runner = dict(type='EpochBasedRunner', max_epochs=7)
lr_config = dict( delete=True, policy='step', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, step=[4])
evaluation = dict(interval=3, metric='mAP') optimizer = dict(lr=0.0)
Thank you for your attention. Can you provide more logs? You can put the logs in clouds, and I can check the reason.
Furthermore, could you please provide how many GPU cards do you use for training.
sys.platform: linux Python: 3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0] CUDA available: True GPU 0: NVIDIA GeForce RTX 4060 Ti CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 11.8, V11.8.89 GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 PyTorch: 1.13.1 PyTorch compiling details: PyTorch built with:
2024-10-19 19:03:44,382 - mmrotate - INFO - Distributed training: False 2024-10-19 19:03:44,640 - mmrotate - INFO - Config: dataset_type = 'DOTADataset' data_root = '/data/PETDet/data/split_ss_dota/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(1024, 1024)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version='le90'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1024, 1024), flip=False, transforms=[ dict(type='RResize'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=2, train=dict( type='DOTADataset', ann_file='/data/PointOBB-v2/work_dirs/generate_pseudo_label_dotav10/', img_prefix='/data/PETDet/data/split_ss_dota/trainval/images/', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(1024, 1024)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version='le90'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ], version='le90', classes=('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter')), val=dict( type='DOTADataset', ann_file='/data/PETDet/data/split_ss_dota/trainval/annfiles/', img_prefix='/data/PETDet/data/split_ss_dota/trainval/images/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1024, 1024), flip=False, transforms=[ dict(type='RResize'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ], version='le90', classes=('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter')), test=dict( type='DOTADataset', ann_file='/data/PETDet/data/split_ss_dota/test/images/', img_prefix='/data/PETDet/data/split_ss_dota/test/images/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1024, 1024), flip=False, transforms=[ dict(type='RResize'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ], version='le90', classes=('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'), samples_per_gpu=4)) evaluation = dict(interval=12, metric='mAP') optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.3333333333333333, step=[8, 11]) runner = dict(type='EpochBasedRunner', max_epochs=12) checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] opencv_num_threads = 0 mp_start_method = 'fork' angle_version = 'le90' classes = ('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter') model = dict( type='ReDet', backbone=dict( type='ReResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, style='pytorch', pretrained='pretrain_model/re_resnet50_c8_batch256-25b16846.pth'), neck=dict( type='ReFPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), rpn_head=dict( type='RotatedRPNHead', in_channels=256, feat_channels=256, version='le90', anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)), roi_head=dict( type='RoITransRoIHead', version='le90', num_stages=2, stage_loss_weights=[1, 1], bbox_roi_extractor=[ dict( type='SingleRoIExtractor', roi_layer=dict( type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), dict( type='RotatedSingleRoIExtractor', roi_layer=dict( type='RiRoIAlignRotated', out_size=7, num_samples=2, num_orientations=8, clockwise=True), out_channels=256, featmap_strides=[4, 8, 16, 32]) ], bbox_head=[ dict( type='RotatedShared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=15, bbox_coder=dict( type='DeltaXYWHAHBBoxCoder', angle_range='le90', norm_factor=2, edge_swap=True, target_means=[0.0, 0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2, 1]), reg_class_agnostic=True, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='RotatedShared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=15, bbox_coder=dict( type='DeltaXYWHAOBBoxCoder', angle_range='le90', norm_factor=None, edge_swap=True, proj_xy=True, target_means=[0.0, 0.0, 0.0, 0.0, 0.0], target_stds=[0.05, 0.05, 0.1, 0.1, 0.5]), reg_class_agnostic=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]), train_cfg=dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=0, pos_weight=-1, debug=False), rpn_proposal=dict( nms_pre=2000, max_per_img=2000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=[ dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1, iou_calculator=dict(type='BboxOverlaps2D')), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1, iou_calculator=dict(type='RBboxOverlaps2D')), sampler=dict( type='RRandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False) ]), test_cfg=dict( rpn=dict( nms_pre=2000, max_per_img=2000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( nms_pre=2000, min_bbox_size=0, score_thr=0.05, nms=dict(iou_thr=0.1), max_per_img=2000))) work_dir = 'work_dirs/cpm_dotav10' auto_resume = False gpu_ids = [0]
2024-10-19 19:03:44,641 - mmrotate - INFO - Set random seed to 1062049044, deterministic: False
2024-10-19 19:04:05,073 - mmrotate - INFO - Start running, host: root@DESKTOP-AN3C39M, work_dir: /data/PointOBB-v2/work_dirs/cpm_dotav10
2024-10-19 19:04:05,073 - mmrotate - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) CheckpointHook
(LOW ) EvalHook
(VERY_LOW ) TextLoggerHook
before_train_epoch:
(VERY_HIGH ) StepLrUpdaterHook
(LOW ) IterTimerHook
(LOW ) EvalHook
(VERY_LOW ) TextLoggerHook
before_train_iter:
(VERY_HIGH ) StepLrUpdaterHook
(LOW ) IterTimerHook
(LOW ) EvalHook
after_train_iter:
(ABOVE_NORMAL) OptimizerHook
(NORMAL ) CheckpointHook
(LOW ) IterTimerHook
(LOW ) EvalHook
(VERY_LOW ) TextLoggerHook
after_train_epoch:
(NORMAL ) CheckpointHook
(LOW ) EvalHook
(VERY_LOW ) TextLoggerHook
before_val_epoch:
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
before_val_iter: (LOW ) IterTimerHook
after_val_iter: (LOW ) IterTimerHook
after_val_epoch: (VERY_LOW ) TextLoggerHook
after_run: (VERY_LOW ) TextLoggerHook
2024-10-19 19:04:05,073 - mmrotate - INFO - workflow: [('train', 1)], max: 12 epochs 2024-10-19 19:04:05,074 - mmrotate - INFO - Checkpoints will be saved to /data/PointOBB-v2/work_dirs/cpm_dotav10 by HardDiskBackend.
how can I adjust the parameters on my device to achieve the best results?"
Thank you for providing more logs, and you can half the lr in all configs because you use 1 GPU to train.
Thank you for the author's suggestion. I will give it a try.
Hello, author! Your semi-supervised method is very innovative, but I tested it on my machine and found that the accuracy of the redet detector I trained on the DOTA1 dataset is only:
This is your evaluation result for task 1 (VOC metrics):
mAP: 0.3938646569332034 ap of each class: plane: 0.6625126693116218, baseball-diamond: 0.4621011560275138, bridge: 0.08590652668698265, ground-track-field: 0.17795102540557087, small-vehicle: 0.395300604938276, large-vehicle: 0.3630655287749475, ship: 0.47992185431651807, tennis-court: 0.8400136579876889, basketball-court: 0.2624536540622941, storage-tank: 0.5564583448677194, soccer-ball-field: 0.22268349854556752, roundabout: 0.5212743281892218, harbor: 0.11509804923758411, swimming-pool: 0.44827813383103693, helicopter: 0.31495082181550704
This is far below the 44.85 reported in the paper.
Could you please advise on what I can do to improve the training results?
redet_dotav10.py:
base = [ '../base/datasets/dotav1.py', '../base/schedules/schedule_1x.py', '../base/default_runtime.py' ]
angle_version = 'le90' data_root = '/data/PETDet/data/split_ss_dota/' classes = ('plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter')
img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(1024, 1024)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version=angle_version), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]
data = dict( train=dict( pipeline=train_pipeline, ann_file='/data/PointOBB-v2/work_dirs/generate_pseudo_label_dotav10/', img_prefix=data_root + 'trainval/images/', version=angle_version, classes=classes), val=dict( ann_file=data_root + 'trainval/annfiles/', img_prefix=data_root + 'trainval/images/', version=angle_version, classes=classes), test=dict( ann_file=data_root + 'test/images/', img_prefix=data_root + 'test/images/', version=angle_version, classes=classes, samples_per_gpu=4))
angle_version = 'le90' model = dict( type='ReDet', backbone=dict( type='ReResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, style='pytorch', pretrained='pretrain_model/re_resnet50_c8_batch256-25b16846.pth'), neck=dict( type='ReFPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), rpn_head=dict( type='RotatedRPNHead', in_channels=256, feat_channels=256, version=angle_version, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), roi_head=dict( type='RoITransRoIHead', version=angle_version, num_stages=2, stage_loss_weights=[1, 1], bbox_roi_extractor=[ dict( type='SingleRoIExtractor', roi_layer=dict( type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), dict( type='RotatedSingleRoIExtractor', roi_layer=dict( type='RiRoIAlignRotated', out_size=7, num_samples=2, num_orientations=8, clockwise=True), out_channels=256, featmap_strides=[4, 8, 16, 32]), ], bbox_head=[ dict( type='RotatedShared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=15, bbox_coder=dict( type='DeltaXYWHAHBBoxCoder', angle_range=angle_version, norm_factor=2, edge_swap=True, target_means=[0., 0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2, 1]), reg_class_agnostic=True, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='RotatedShared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=15, bbox_coder=dict( type='DeltaXYWHAOBBoxCoder', angle_range=angle_version, norm_factor=None, edge_swap=True, proj_xy=True, target_means=[0., 0., 0., 0., 0.], target_stds=[0.05, 0.05, 0.1, 0.1, 0.5]), reg_class_agnostic=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ]), train_cfg=dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=0, pos_weight=-1, debug=False), rpn_proposal=dict( nms_pre=2000, max_per_img=2000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=[ dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1, iou_calculator=dict(type='BboxOverlaps2D')), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1, iou_calculator=dict(type='RBboxOverlaps2D')), sampler=dict( type='RRandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False) ]), test_cfg=dict( rpn=dict( nms_pre=2000, max_per_img=2000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( nms_pre=2000, min_bbox_size=0, score_thr=0.05, nms=dict(iou_thr=0.1), max_per_img=2000)))
optimizer = dict(lr=0.01)
evaluation
evaluation = dict(interval=12, metric='mAP')