open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
29.21k stars 9.4k forks source link

error when using ghm loss #2398

Closed YangHai-1218 closed 4 years ago

YangHai-1218 commented 4 years ago

Describe the bug when using ghm loss,error occured.

Reproduction

  1. script
    fp16 = dict(loss_scale=512.)
    # model settings
    model = dict(
    type='CascadeRCNN',
    num_stages=3,
    pretrained=None,
    backbone=dict(
        type='ResNeXt',
        depth=101,
        groups=64,
        base_width=4,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        loss_cls=dict(
            type='GHMC',
            bins=30,
            momentum=0.75,
            use_sigmoid=True,
            loss_weight=1.0),
        loss_bbox=dict(
            type='GHMR',
            mu=0.02,
            bins=10,
            momentum=0.7,
            loss_weight=10.0)),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=[
        dict(
            type='SharedFCBBoxHead',
            num_fcs=2,
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=5,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.1, 0.1, 0.2, 0.2],
            reg_class_agnostic=True,
            loss_cls=dict(
                type='GHMC',
                bins=30,
                momentum=0.75,
                use_sigmoid=True,
                loss_weight=1.0),
            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
        dict(
            type='SharedFCBBoxHead',
            num_fcs=2,
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=5,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.05, 0.05, 0.1, 0.1],
            reg_class_agnostic=True,
            loss_cls=dict(
                type='GHMC',
                bins=30,
                momentum=0.75,
                use_sigmoid=True,
                loss_weight=1.0),
            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
        dict(
            type='SharedFCBBoxHead',
            num_fcs=2,
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=5,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.033, 0.033, 0.067, 0.067],
            reg_class_agnostic=True,
            loss_cls=dict(
                type='GHMC',
                bins=30,
                momentum=0.75,
                use_sigmoid=True,
                loss_weight=1.0),
            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
    ])
    # model training and testing settings
    train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        debug=False),
    rpn_proposal=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=[
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False),
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.6,
                neg_iou_thr=0.6,
                min_pos_iou=0.6,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False),
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.7,
                neg_iou_thr=0.7,
                min_pos_iou=0.7,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSampler',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)
    ],
    stage_loss_weights=[1, 0.5, 0.25])
    test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=1000,
        nms_post=1000,
        max_num=1000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.0001, nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.0001), max_per_img=50))
    # dataset settings
    dataset_type = 'CocoDataset'
    data_root = 'data/'
    img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
    train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=[(4096, 600), (4096, 1000)],
         multiscale_mode='range', keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ]
    test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=[(4096, 600), (4096, 800), (4096, 1000)],
        flip=True,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
    ]
    data = dict(
    imgs_per_gpu=1,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train_annotations_coco_style.json',
        img_prefix=data_root + 'train/',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val_annotations_coco_style.json',
        img_prefix=data_root + 'val/',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file= data_root+'val_annotations_coco_style_crop.json',
        img_prefix=data_root + 'val_crop/',
        pipeline=test_pipeline))
    # optimizer
    optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
    optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
    # learning policy
    lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
    checkpoint_config = dict(interval=1)
    # yapf:disable
    log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
    # yapf:enable
    # runtime settings
    total_epochs = 12
    dist_params = dict(backend='nccl')
    log_level = 'INFO'
    work_dir = './work_dirs/cascade_rcnn_x101_64x4d_fpn_ghm'
    load_from = 'data/pretrained/cascade_rcnn_x101_64x4d_fpn_1x_20181218-e2dc376a.pth'
    resume_from = None
    workflow = [('train', 1)]

Error traceback

Traceback (most recent call last):
  File "./tools/train.py", line 142, in <module>
    main()
  File "./tools/train.py", line 138, in main
    meta=meta)
  File "/home/mmdetection/mmdet/apis/train.py", line 102, in train_detector
    meta=meta)
  File "/home/mmdetection/mmdet/apis/train.py", line 181, in _dist_train
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/mmcv-0.4.2-py3.6-linux-x86_64.egg/mmcv/runner/runner.py", line 359, in run
    epoch_runner(data_loaders[i], **kwargs)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/mmcv-0.4.2-py3.6-linux-x86_64.egg/mmcv/runner/runner.py", line 263, in train
    self.model, data_batch, train_mode=True, **kwargs)
  File "/home/mmdetection/mmdet/apis/train.py", line 75, in batch_processor
    losses = model(**data)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/nn/parallel/distributed.py", line 442, in forward
    output = self.module(*inputs[0], **kwargs[0])
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/mmdetection/mmdet/core/fp16/decorators.py", line 75, in new_func
    output = old_func(*new_args, **new_kwargs)
  File "/home/mmdetection/mmdet/models/detectors/base.py", line 147, in forward
    return self.forward_train(img, img_metas, **kwargs)
  File "/home/mmdetection/mmdet/models/detectors/cascade_rcnn.py", line 197, in forward_train
    *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
  File "/home/mmdetection/mmdet/models/anchor_heads/rpn_head.py", line 51, in loss
    gt_bboxes_ignore=gt_bboxes_ignore)
  File "/home/mmdetection/mmdet/core/fp16/decorators.py", line 152, in new_func
    output = old_func(*new_args, **new_kwargs)
  File "/home/mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 205, in loss
    cfg=cfg)
  File "/home/mmdetection/mmdet/core/utils/misc.py", line 24, in multi_apply
    return tuple(map(list, zip(*map_results)))
  File "/home/mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 149, in loss_single
    cls_score, labels, label_weights, avg_factor=num_total_samples)
  File "/root/anaconda3/envs/python367/lib/python3.6/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/mmdetection/mmdet/models/losses/ghm_loss.py", line 78, in forward
    inds = (g >= edges[i]) & (g < edges[i + 1]) & valid
RuntimeError: Expected object of scalar type Float but got scalar type Half for argument #2 'other'
ZwwWayne commented 4 years ago

Hi @NaCl-Ocean , This seems to because some input have different types when they are sent to GHM_loss, you might need to find and correct their types to be the same.