Closed tianyuluan closed 3 years ago
https://github.com/open-mmlab/mmdetection/blob/38dfa875c048207fd46b8cd2b7ccafd5239b4a4e/mmdet/models/backbones/resnet.py#L186-L195
The assertion said you can't make self.conv2
a DCN or other special convs the same time. But you can remove that assertion and combine yourself.
my commmand is python tools/train.py ./configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py
the info is
Traceback (most recent call last): File "tools/train.py", line 153, in
main()
File "tools/train.py", line 126, in main
cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
File "/home/lty/mmdetection/mmdet/models/builder.py", line 67, in build_detector
return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
File "/home/lty/mmdetection/mmdet/models/builder.py", line 32, in build
return build_from_cfg(cfg, registry, default_args)
File "/home/lty/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/utils/registry.py", line 167, in build_from_cfg
return obj_cls(args)
File "/home/lty/mmdetection/mmdet/models/detectors/cascade_rcnn.py", line 25, in init
pretrained=pretrained)
File "/home/lty/mmdetection/mmdet/models/detectors/two_stage.py", line 26, in init
self.backbone = build_backbone(backbone)
File "/home/lty/mmdetection/mmdet/models/builder.py", line 37, in build_backbone
return build(cfg, BACKBONES)
File "/home/lty/mmdetection/mmdet/models/builder.py", line 32, in build
return build_from_cfg(cfg, registry, default_args)
File "/home/lty/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/utils/registry.py", line 167, in build_from_cfg
return obj_cls(args)
File "/home/lty/mmdetection/mmdet/models/backbones/detectors_resnet.py", line 240, in init
super(DetectoRS_ResNet, self).init(kwargs)
File "/home/lty/mmdetection/mmdet/models/backbones/resnet.py", line 441, in init
plugins=stage_plugins)
File "/home/lty/mmdetection/mmdet/models/backbones/detectors_resnet.py", line 279, in make_res_layer
return ResLayer(kwargs)
File "/home/lty/mmdetection/mmdet/models/backbones/detectors_resnet.py", line 189, in init
kwargs))
File "/home/lty/mmdetection/mmdet/models/backbones/detectors_resnet.py", line 34, in init
super(Bottleneck, self).init(inplanes, planes, kwargs)
File "/home/lty/mmdetection/mmdet/models/backbones/resnet.py", line 185, in init
assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
AssertionError: conv_cfg must be None for DCN
my cfg is model = dict( type='CascadeRCNN', pretrained='torchvision://resnet152', backbone=dict( type='DetectoRS_ResNet', depth=152, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch', conv_cfg=dict(type='ConvAWS'), sac=dict(type='SAC', use_deform=True), stage_with_sac=(False, True, True, True), output_img=True, dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True)), neck=dict( type='RFP', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5, rfp_steps=2, aspp_out_channels=64, aspp_dilations=(1, 3, 6, 1), rfp_backbone=dict( rfp_inplanes=256, type='DetectoRS_ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, conv_cfg=dict(type='ConvAWS'), sac=dict(type='SAC', use_deform=True), stage_with_sac=(False, True, True, True), pretrained='torchvision://resnet50', style='pytorch')), rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)), roi_head=dict( type='CascadeRoIHead', num_stages=3, stage_loss_weights=[1, 0.5, 0.25], bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=[ dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=10, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=True, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=10, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.05, 0.05, 0.1, 0.1]), reg_class_agnostic=True, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=10, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.033, 0.033, 0.067, 0.067]), reg_class_agnostic=True, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) ])) train_cfg = dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=0, pos_weight=-1, debug=False), rpn_proposal=dict( nms_across_levels=False, nms_pre=2000, nms_post=2000, max_num=2000, nms_thr=0.7, min_bbox_size=0), rcnn=[ dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.3, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.4, neg_iou_thr=0.4, min_pos_iou=0.4, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False) ]) test_cfg = dict( rpn=dict( nms_across_levels=False, nms_pre=1000, nms_post=1000, max_num=1000, nms_thr=0.7, min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.001), max_per_img=100)) dataset_type = 'VOCDataset' data_root = '/home/lty/mmdetection/data/VOCdevkit/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) albu_train_transforms = [ dict( type='ShiftScaleRotate', shift_limit=0.0625, scale_limit=0.0, rotate_limit=180, interpolation=1, p=0.5) ] train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=[(1000, 600), (1000, 800)], keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.7), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict( type='Albu', transforms=[ dict( type='ShiftScaleRotate', shift_limit=0.0625, scale_limit=0.0, rotate_limit=180, interpolation=1, p=0.5) ], bbox_params=dict( type='BboxParams', format='pascal_voc', label_fields=['gt_labels'], min_visibility=0.0, filter_lost_elements=True), keymap=dict(img='image', gt_bboxes='bboxes'), update_pad_shape=False, skip_img_without_anno=True), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1000, 600), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=1, workers_per_gpu=2, train=dict( type='RepeatDataset', times=3, dataset=dict( type='VOCDataset', ann_file=[ '/home/lty/mmdetection/data/VOCdevkit/VOC2007/ImageSets/Main/train.txt' ], img_prefix=['/home/lty/mmdetection/data/VOCdevkit/VOC2007/'], pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict( type='Resize', img_scale=[(1000, 600), (1000, 800)], keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.7), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict( type='Albu', transforms=[ dict( type='ShiftScaleRotate', shift_limit=0.0625, scale_limit=0.0, rotate_limit=180, interpolation=1, p=0.5) ], bbox_params=dict( type='BboxParams', format='pascal_voc', label_fields=['gt_labels'], min_visibility=0.0, filter_lost_elements=True), keymap=dict(img='image', gt_bboxes='bboxes'), update_pad_shape=False, skip_img_without_anno=True), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ])), val=dict( type='VOCDataset', ann_file= '/home/lty/mmdetection/data/VOCdevkit/VOC2007/ImageSets/Main/val.txt', img_prefix='/home/lty/mmdetection/data/VOCdevkit/VOC2007/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1000, 600), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='VOCDataset', ann_file= '/home/lty/mmdetection/data/VOCdevkit/VOC2007/ImageSets/Main/val.txt', img_prefix='/home/lty/mmdetection/data/VOCdevkit/VOC2007/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1000, 600), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(interval=1, metric='mAP') optimizer = dict(type='SGD', lr=0.0025, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) total_epochs = 12 checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] work_dir = './work_dirs/detectors_cascade_rcnn_r50_1x_coco' gpu_ids = range(0, 1)
can you give me some advices??Thanks!!!!