Closed muzishen closed 4 years ago
Unfortunately, I have no experience with Cascade RCNN. Were you able to reproduce results for Faster RCNN? Btw mMR metric in your case looks fine.
I can get the result of Faster RCNN, but l got a warning when l run the Cascade RCNN.
WARNING [brambox.stat._matchboxes] Annotation and detection dataframes do not have the same image categories
This warning means that in evaluate function predicted_df
and true_df
have different number of unique images, i.e. there are 0 predictions for some images. I think this is not an error.
Can you share your config for this experiment?
Thank you, my config as follow:
model = dict( type='CascadeRCNN', pretrained=None,
backbone=dict(
type='Res2Net',
depth=101,
scales=4,
base_width=26,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5,
norm_cfg=dict(type='BN')),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[1.0, 1.5, 2.0, 2.5, 3.0], # anchor的宽高比
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
roi_head=dict(
type='CascadeRoIHead',
num_stages=3,
stage_loss_weights=[1, 0.5, 0.25],
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
add_context=False,
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
]))
train_cfg = dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=0.5), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( nms_across_levels=False, nms_pre=2000, nms_post=2000, max_num=2000, nms_thr=0.7, min_bbox_size=0), rcnn=[ dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False), dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.7, min_pos_iou=0.7, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False) ]) test_cfg = dict( rpn=dict( nms_across_levels=False, nms_pre=1000, nms_post=1000, max_num=1000, nms_thr=0.7, min_bbox_size=0), rcnn=dict( score_thr=0.01, nms=dict(type='nms', iou_thr=0.5), max_per_img=1000))
dataset_type = 'CrowdHumanDataset' data_root = '/cache/crowdhuman/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=[(1000, 600), (1666, 1000)], keep_ratio=True, final_crop=False), dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='Grid', use_w=True, use_h=True),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_ignore']),
] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
] data = dict( imgs_per_gpu=2, workers_per_gpu=2, train=dict( type=dataset_type, ann_file=data_root + 'annotation_full_train.json', img_prefix=data_root + 'Images/',
# img_prefix=r'/cache/train/new_images_withoutrect/',
# ann_file=data_root + 'annotations/instances_train2017.json',
# img_prefix=data_root + 'train2017/',
# ann_file=[data_root + 'annotations/instances_train2017.json', data_root + 'annotations/instances_val2017.json'],
# img_prefix=[data_root + 'train2017/', data_root + 'val2017/'],
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotation_full_val.json',
img_prefix=data_root + 'Images/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotation_full_val.json',
img_prefix=data_root + 'Images/',
pipeline=test_pipeline))
optimizer = dict( type='Adam', lr=.0001 ) optimizer_config = dict(grad_clip=None)
lr_config = dict( policy='step', step=[16, 22]) checkpoint_config = dict(interval=1)
log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'),
])
total_epochs = 24 dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = r'/cache/log/' load_from = r'/cache/cascade_rcnn_r2_101_fpn_20e_coco-f4b7b7db.pth'
resume_from = None
workflow = [('train', 1)]
May be I'm missing something, but how is this config connected with IterDet
? Looks like this is a almost default CascadeRCNN
config for mmdetection
. For FasterRCNN
experiments we developed IterDetFasterRCNN
class. So for your CascadeRCNN
experiment you need to implement IterDetCascadeRCNN
class. Just copy everything from IterDetFasterRCNN, and rename it. Also be careful with config file, first copy crowd_human_full_faster_rcnn_r50_fpn_2x.py and then replace the model with IterDerCascadeRCNN
and other parameters. Your current config is missing n_iterations
parameter, AddHistory
transform frozen_stages=-1
etc.
Thank you, you are right. This is a default config, I want to get the baseline score. Then compared with iterdet of cascade rcnn. But i find the baseline of Cascade RCNN do not seem to work. And I don't find the reason.
Looks like you are rather far from original CascadeRCNN
config, using Adam
instead of SGD
, COCO
pretraining instead of ImageNet
, Res2Net
instead of Resnet
etc. May be it will be better to start finetuning from these basic options.
Yes, i try again with the original Cascade RCNN and contact you again. Thank you.
Hello, when I try to train cascade rcnn with iterdet on crowdhuman, I found the performance very poor. The mAP is about 80% and i found a warning. I don't know if it makes a problem. ===========log============== 2020-06-15 18:32:47,161 - mmdet - INFO - Epoch [15][7500/7500] lr: 0.00010, eta: 9:32:02, time: 0.515, data_time: 0.007, memory: 17477, loss_rpn_cls: 0.0357, loss_rpn_bbox: 0.0520, s0.loss_cls: 0.1649, s0.acc: 92.7441, s0.loss_bbox: 0.1755, s1.loss_cls: 0.0771, s1.acc: 93.3403, s1.loss_bbox: 0.1801, s2.loss_cls: 0.0400, s2.acc: 92.7921, s2.loss_bbox: 0.1017, loss: 0.8270 WARNING [brambox.stat._matchboxes] Annotation and detection dataframes do not have the same image categories 2020-06-15 18:39:23,571 - mmdet - INFO - {'gts': 99481, 'dets': 184838, 'recall': 0.8096721987113117, 'mAP': 0.7833646002216192, 'mMR': 0.5025026311323174} 2020-06-15 18:39:23,575 - mmdet - INFO - Epoch [15][7500/7500] lr: 0.00010, gts: 99481, dets: 184838, recall: 0.8097, mAP: 0.7834, mMR: 0.5025
Hi, have you reproduced the results with faster-rcnn? I also got poor results on AP and recall, recall: 0.8713, mAP: 0.8383, mMR: 0.4946?
Hello, when I try to train cascade rcnn with iterdet on crowdhuman, I found the performance very poor. The mAP is about 80% and i found a warning. I don't know if it makes a problem. ===========log============== 2020-06-15 18:32:47,161 - mmdet - INFO - Epoch [15][7500/7500] lr: 0.00010, eta: 9:32:02, time: 0.515, data_time: 0.007, memory: 17477, loss_rpn_cls: 0.0357, loss_rpn_bbox: 0.0520, s0.loss_cls: 0.1649, s0.acc: 92.7441, s0.loss_bbox: 0.1755, s1.loss_cls: 0.0771, s1.acc: 93.3403, s1.loss_bbox: 0.1801, s2.loss_cls: 0.0400, s2.acc: 92.7921, s2.loss_bbox: 0.1017, loss: 0.8270 WARNING [brambox.stat._matchboxes] Annotation and detection dataframes do not have the same image categories 2020-06-15 18:39:23,571 - mmdet - INFO - {'gts': 99481, 'dets': 184838, 'recall': 0.8096721987113117, 'mAP': 0.7833646002216192, 'mMR': 0.5025026311323174} 2020-06-15 18:39:23,575 - mmdet - INFO - Epoch [15][7500/7500] lr: 0.00010, gts: 99481, dets: 184838, recall: 0.8097, mAP: 0.7834, mMR: 0.5025