open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
29.61k stars 9.47k forks source link

[Bug] Error about Seesaw Loss when using single-stage instance segmentation algorithm #9310

Closed zaneoo closed 1 year ago

zaneoo commented 2 years ago

Prerequisite

Task

I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.

Branch

master branch https://github.com/open-mmlab/mmdetection

Environment

sys.platform: linux Python: 3.8.10 (default, Jun 4 2021, 15:09:15) [GCC 7.5.0] CUDA available: True GPU 0,1,2,3: NVIDIA RTX A5000 CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 11.3, V11.3.109 GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0 PyTorch: 1.10.0+cu113 PyTorch compiling details: PyTorch built with:

TorchVision: 0.11.1+cu113 OpenCV: 4.6.0 MMCV: 1.7.0 MMCV Compiler: GCC 9.3 MMCV CUDA Compiler: 11.3 MMDetection: 2.25.3+e71b499

Reproduces the problem - code sample

@LOSSES.register_module() class SeesawLoss(nn.Module): """ Seesaw Loss for Long-Tailed Instance Segmentation (CVPR 2021) arXiv: https://arxiv.org/abs/2008.10032 Args: use_sigmoid (bool, optional): Whether the prediction uses sigmoid of softmax. Only False is supported. p (float, optional): The p in the mitigation factor. Defaults to 0.8. q (float, optional): The q in the compenstation factor. Defaults to 2.0. num_classes (int, optional): The number of classes. Default to 1203 for LVIS v1 dataset. eps (float, optional): The minimal value of divisor to smooth the computation of compensation factor reduction (str, optional): The method that reduces the loss to a scalar. Options are "none", "mean" and "sum". loss_weight (float, optional): The weight of the loss. Defaults to 1.0 return_dict (bool, optional): Whether return the losses as a dict. Default to True. """

def __init__(self,
             use_sigmoid=False,
             p=0.8,
             q=2.0,
             num_classes=1203,
             eps=1e-2,
             reduction='mean',
             loss_weight=1.0,
             return_dict=True):
    super(SeesawLoss, self).__init__()
    assert not use_sigmoid
    self.use_sigmoid = False
    self.p = p
    self.q = q
    self.num_classes = num_classes
    self.eps = eps
    self.reduction = reduction
    self.loss_weight = loss_weight
    self.return_dict = return_dict
    # 0 for pos, 1 for neg
    self.cls_criterion = seesaw_ce_loss
    # cumulative samples for each category
    self.register_buffer(
        'cum_samples',
        torch.zeros(self.num_classes + 1, dtype=torch.float))
    # custom output channels of the classifier
    self.custom_cls_channels = True
    # custom activation of cls_score
    self.custom_activation = True
    # custom accuracy of the classsifier
    self.custom_accuracy = True
def _split_cls_score(self, cls_score):
    # split cls_score to cls_score_classes and cls_score_objectness
    assert cls_score.size(-1) == self.num_classes + 2
    cls_score_classes = cls_score[..., :-2]
    cls_score_objectness = cls_score[..., -2:]
    return cls_score_classes, cls_score_objectness
def get_cls_channels(self, num_classes):
    """Get custom classification channels.
    Args:
        num_classes (int): The number of classes.
    Returns:
        int: The custom classification channels.
    """
    assert num_classes == self.num_classes
    return num_classes + 2
def get_activation(self, cls_score):
    """Get custom activation of cls_score.
    Args:
        cls_score (torch.Tensor): The prediction with shape (N, C + 2).
    Returns:
        torch.Tensor: The custom activation of cls_score with shape
             (N, C + 1).
    """
    cls_score_classes, cls_score_objectness = self._split_cls_score(
        cls_score)
    score_classes = F.softmax(cls_score_classes, dim=-1)
    score_objectness = F.softmax(cls_score_objectness, dim=-1)
    score_pos = score_objectness[..., [0]]
    score_neg = score_objectness[..., [1]]
    score_classes = score_classes * score_pos
    scores = torch.cat([score_classes, score_neg], dim=-1)
    return scores
def get_accuracy(self, cls_score, labels):
    """Get custom accuracy w.r.t. cls_score and labels.
    Args:
        cls_score (torch.Tensor): The prediction with shape (N, C + 2).
        labels (torch.Tensor): The learning label of the prediction.
    Returns:
        Dict [str, torch.Tensor]: The accuracy for objectness and classes,
             respectively.
    """
    pos_inds = labels < self.num_classes
    obj_labels = (labels == self.num_classes).long()
    cls_score_classes, cls_score_objectness = self._split_cls_score(
        cls_score)
    acc_objectness = accuracy(cls_score_objectness, obj_labels)
    acc_classes = accuracy(cls_score_classes[pos_inds], labels[pos_inds])
    acc = dict()
    acc['acc_objectness'] = acc_objectness
    acc['acc_classes'] = acc_classes
    return acc
def forward(self,
            cls_score,
            labels,
            label_weights=None,
            avg_factor=None,
            reduction_override=None):
    """Forward function.
    Args:
        cls_score (torch.Tensor): The prediction with shape (N, C + 2).
        labels (torch.Tensor): The learning label of the prediction.
        label_weights (torch.Tensor, optional): Sample-wise loss weight.
        avg_factor (int, optional): Average factor that is used to average
             the loss. Defaults to None.
        reduction (str, optional): The method used to reduce the loss.
             Options are "none", "mean" and "sum".
    Returns:
        torch.Tensor | Dict [str, torch.Tensor]:
             if return_dict == False: The calculated loss |
             if return_dict == True: The dict of calculated losses
             for objectness and classes, respectively.
    """
    assert reduction_override in (None, 'none', 'mean', 'sum')
    reduction = (
        reduction_override if reduction_override else self.reduction)
    assert cls_score.size(-1) == self.num_classes + 2

Reproduces the problem - command or script

official train script

Reproduces the problem - error message

Traceback (most recent call last): File "tools/train.py", line 244, in main() File "tools/train.py", line 233, in main train_detector( File "/root/autodl-tmp/mmdetection/mmdet/apis/train.py", line 244, in train_detector runner.run(data_loaders, cfg.workflow) File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 136, in run epoch_runner(data_loaders[i], kwargs) File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 53, in train self.run_iter(data_batch, train_mode=True, kwargs) File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 31, in run_iter outputs = self.model.train_step(data_batch, self.optimizer, File "/root/miniconda3/lib/python3.8/site-packages/mmcv/parallel/data_parallel.py", line 77, in train_step return self.module.train_step(inputs[0], kwargs[0]) File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/base.py", line 248, in train_step losses = self(data) File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, kwargs) File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 119, in new_func return old_func(args, kwargs) File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/base.py", line 172, in forward return self.forward_train(img, img_metas, kwargs) File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/yolact.py", line 76, in forward_train losses, sampling_results = self.bbox_head.loss( File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 208, in new_func return old_func(args, kwargs) File "/root/autodl-tmp/mmdetection/mmdet/models/dense_heads/yolact_head.py", line 224, in loss losses_cls, losses_bbox = multi_apply( File "/root/autodl-tmp/mmdetection/mmdet/core/utils/misc.py", line 30, in multi_apply return tuple(map(list, zip(map_results))) File "/root/autodl-tmp/mmdetection/mmdet/models/dense_heads/yolact_head.py", line 265, in loss_single_OHEM loss_cls_all = self.loss_cls(cls_score, labels, label_weights) File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, kwargs) File "/root/autodl-tmp/mmdetection/mmdet/models/losses/seesaw_loss.py", line 224, in forward assert cls_score.size(-1) == self.num_classes + 2**

Additional information

Our dataset has 22 classes, following the COCO format. Seesaw Loss was used as the "loss_cls" in the bbox_head. We used YOLACT as the instance segmentation algorithm. This bug has been asked by other developers (issue: #6424), but no one has been slow to provide a solution. Can you give some help? I tried to print "cls_score.size(-1)" and the result is "23"

Our config information : base = '../base/default_runtime.py' img_size = 550 model = dict( type='YOLACT', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True), norm_eval=False, # update the statistics of bn zero_init_residual=False, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, start_level=1, add_extra_convs='on_input', num_outs=5, upsample_cfg=dict(mode='bilinear')), bbox_head=dict( type='YOLACTHead', num_classes=22, in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', octave_base_scale=3, scales_per_octave=1, base_sizes=[8, 16, 32, 64, 128], ratios=[0.5, 1.0, 2.0], strides=[550.0 / x for x in [69, 35, 18, 9, 5]], centers=[(550 0.5 / x, 550 0.5 / x) for x in [69, 35, 18, 9, 5]]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[0.1, 0.1, 0.2, 0.2]), loss_cls=dict( type='SeesawLoss', p=0.8, q=2.0, num_classes=22, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5), num_head_convs=1, num_protos=32, use_ohem=True), mask_head=dict( type='YOLACTProtonet', in_channels=256, num_protos=32, num_classes=22, max_masks_to_train=100, loss_mask_weight=6.125), segm_head=dict( type='YOLACTSegmHead', num_classes=22, in_channels=256, loss_segm=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), train_cfg=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0., ignore_iof_thr=-1, gt_max_assign_all=False),

smoothl1_beta=1.,

    allowed_border=-1,
    pos_weight=-1,
    neg_pos_ratio=3,
    debug=False),
test_cfg=dict(
    nms_pre=1000,
    min_bbox_size=0,
    score_thr=0.05,
    iou_thr=0.5,
    top_k=200,
    max_per_img=100))

dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)), dict( type='Expand', mean=img_norm_cfg['mean'], to_rgb=img_norm_cfg['to_rgb'], ratio_range=(1, 4)), dict( type='MinIoURandomCrop', min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False), dict(type='RandomFlip', flip_ratio=0.5), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict(type='Normalize', img_norm_cfg), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(img_size, img_size), flip=False, transforms=[ dict(type='Resize', keep_ratio=False), dict(type='Normalize', img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), ]) ] data = dict( samples_per_gpu=8, workers_per_gpu=4, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', pipeline=test_pipeline)) optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.1, step=[20, 42, 49, 52]) runner = dict(type='EpochBasedRunner', max_epochs=55) cudnn_benchmark = True evaluation = dict(metric=['bbox', 'segm']) auto_scale_lr = dict(enable=True, base_batch_size=32)

ZwwWayne commented 2 years ago

seesaw loss does not support single stage detectors.

TechnicalNovice commented 1 year ago

seesaw loss does not support single stage detectors.

May I ask how seesaw loss is used on faster rcnn