TypeError: forward_train() got an unexpected keyword argument 'ref_img_metas'

376498485 commented 2 years ago

Hi, When I use vfnet(VarifocalNet) in mmdetection and bytetrack, I meet the following errors: My config is as follows:

model = dict(
    detector=dict(
        type='VFNet',
        backbone=dict(
            type='ResNet',
            depth=101,
            num_stages=4,
            out_indices=(0, 1, 2, 3),
            frozen_stages=1,
            norm_cfg=dict(type='BN', requires_grad=True),
            norm_eval=True,
            style='pytorch',
            init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
            stage_with_dcn=(False, True, True, True)),
        neck=dict(
            type='FPN',
            in_channels=[256, 512, 1024, 2048],
            out_channels=256,
            start_level=1,
            add_extra_convs='on_output',
            num_outs=5,
            relu_before_extra_convs=True),
        bbox_head=dict(
            type='VFNetHead',
            num_classes=1,
            in_channels=256,
            stacked_convs=3,
            feat_channels=256,
            strides=[8, 16, 32, 64, 128],
            center_sampling=False,
            dcn_on_last_conv=True,
            use_atss=True,
            use_vfl=True,
            loss_cls=dict(
                type='VarifocalLoss',
                use_sigmoid=True,
                alpha=0.75,
                gamma=2.0,
                iou_weighted=True,
                loss_weight=1.0),
            loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
            loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
        init_cfg=dict(type='Pretrained', checkpoint='https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth'),
        train_cfg=dict(
            assigner=dict(type='ATSSAssigner', topk=9),
            allowed_border=-1,
            pos_weight=-1,
            debug=False),
        test_cfg=dict(
            nms_pre=1000,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.6),
            max_per_img=100)
        ),

    type='ByteTrack',
    motion=dict(type='KalmanFilter'),
    tracker=dict(
        type='ByteTracker',
        obj_score_thrs=dict(high=0.6, low=0.1),
        init_track_thr=0.7,
        weight_iou_with_det_scores=True,
        match_iou_thrs=dict(high=0.1, low=0.5, tentative=0.3),
        num_frames_retain=30))
dataset_type = 'MOTChallengeDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadMultiImagesFromFile', to_float32=True),
    dict(type='SeqLoadAnnotations', with_bbox=True, with_track=True),
    dict(
        type='SeqResize',
        img_scale=(1088, 1088),
        multiscale_mode='range',
        share_params=True,
        ratio_range=(0.8, 1.2),
        keep_ratio=True,
        bbox_clip_border=False),
    dict(type='SeqPhotoMetricDistortion', share_params=True),
    dict(
        type='SeqRandomCrop',
        share_params=False,
        crop_size=(1088, 1088),
        bbox_clip_border=False),
    dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
    dict(
        type='SeqNormalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='SeqPad', size_divisor=32),
    dict(type='MatchInstances', skip_nomatch=True),
    dict(
        type='VideoCollect',
        keys=[
            'img', 'gt_bboxes', 'gt_labels',
        ]),
    dict(type='SeqDefaultFormatBundle', ref_prefix='ref')
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='VideoCollect', keys=['img'])
        ])
]
data_root = 'data/MOT17/'
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='MOTChallengeDataset',
        visibility_thr=-1,
        ann_file='data/MOT17/annotations/half-train_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=dict(
            num_ref_imgs=1,
            frame_range=10,
            filter_key_img=True,
            method='uniform'),
        pipeline=train_pipeline),
    val=dict(
        type='MOTChallengeDataset',
        ann_file='data/MOT17/annotations/half-val_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=None,
        pipeline=test_pipeline),
    test=dict(
        type='MOTChallengeDataset',
        ann_file='data/MOT17/annotations/half-val_cocoformat.json',
        img_prefix='data/MOT17/train',
        ref_img_sampler=None,
        pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'track'], interval=1)
optimizer = dict(
    type='SGD',
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0001,
    paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=None)
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.1,
    step=[16, 22])
total_epochs = 50
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
search_metrics = ['MOTA', 'IDF1', 'FN', 'FP', 'IDs', 'MT', 'ML']
work_dir = './work_dirs/bytetrack_vfnet'
gpu_ids = [0]

I will very appreciate it if you can help me.

dyhBUPT commented 2 years ago

Hi, do you use the latest version of mmdet?

376498485 commented 2 years ago

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

dyhBUPT commented 2 years ago

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

376498485 commented 2 years ago

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

I have try Sparse RCNN, and it works. But deformable_detr、tood not work. I just modify the detector in config files. I think RCNN-like models in mmtrack can be used with bytetrack.

dyhBUPT commented 2 years ago

Mayby you should set the "ref_img_sampler=None" in the "data" iterm.

pixeli99 commented 2 years ago

Hi, lijinrun I think there is something wrong with your data configuration file. Please use the following configuration, which is equivalent to mmtracking/configs/_base_/datasets/mot_challenge_det.py. When we train bytetrack, we don't need the ref_img, so it will report the error you pointed out. Using the following configuration file, it can be compatible with any detector.

dataset_type = 'CocoDataset'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile', to_float32=True, file_client_args=file_client_args,),
    dict(type='LoadAnnotations', with_bbox=True,file_client_args=file_client_args, ),
    dict(
        type='Resize',
        img_scale=(1088, 1088),
        ratio_range=(0.8, 1.2),
        keep_ratio=True,
        bbox_clip_border=False),
    dict(type='PhotoMetricDistortion'),
    dict(type='RandomCrop', crop_size=(1088, 1088), bbox_clip_border=False),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
    dict(type='LoadImageFromFile',file_client_args=file_client_args, ),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1088, 1088),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data_root = 'data/MOT17/'
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-train_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-val_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        file_client_args=file_client_args,
        ann_file=data_root + 'annotations/half-val_cocoformat.json',
        img_prefix=data_root + 'train',
        classes=('pedestrian', ),
        pipeline=test_pipeline))

pixeli99 commented 2 years ago

Hi, do you use the latest version of mmdet?

My mmdet's version is 2.24. Maybe not all detectors in mmdet can be used with BYTE in mmtrack.

Have you tried other detectors? And do they work?

I have try Sparse RCNN, and it works. But deformable_detr、tood not work. I just modify the detector in config files. I think RCNN-like models in mmtrack can be used with bytetrack.

You said that it can run on sparse_rcnn, because the implementation of mmdet supports any parameters.

open-mmlab / mmtracking

TypeError: forward_train() got an unexpected keyword argument 'ref_img_metas' #529