@LOSSES.register_module()
class SeesawLoss(nn.Module):
"""
Seesaw Loss for Long-Tailed Instance Segmentation (CVPR 2021)
arXiv: https://arxiv.org/abs/2008.10032
Args:
use_sigmoid (bool, optional): Whether the prediction uses sigmoid
of softmax. Only False is supported.
p (float, optional): The p in the mitigation factor.
Defaults to 0.8.
q (float, optional): The q in the compenstation factor.
Defaults to 2.0.
num_classes (int, optional): The number of classes.
Default to 1203 for LVIS v1 dataset.
eps (float, optional): The minimal value of divisor to smooth
the computation of compensation factor
reduction (str, optional): The method that reduces the loss to a
scalar. Options are "none", "mean" and "sum".
loss_weight (float, optional): The weight of the loss. Defaults to 1.0
return_dict (bool, optional): Whether return the losses as a dict.
Default to True.
"""
def __init__(self,
use_sigmoid=False,
p=0.8,
q=2.0,
num_classes=1203,
eps=1e-2,
reduction='mean',
loss_weight=1.0,
return_dict=True):
super(SeesawLoss, self).__init__()
assert not use_sigmoid
self.use_sigmoid = False
self.p = p
self.q = q
self.num_classes = num_classes
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
self.return_dict = return_dict
# 0 for pos, 1 for neg
self.cls_criterion = seesaw_ce_loss
# cumulative samples for each category
self.register_buffer(
'cum_samples',
torch.zeros(self.num_classes + 1, dtype=torch.float))
# custom output channels of the classifier
self.custom_cls_channels = True
# custom activation of cls_score
self.custom_activation = True
# custom accuracy of the classsifier
self.custom_accuracy = True
def _split_cls_score(self, cls_score):
# split cls_score to cls_score_classes and cls_score_objectness
assert cls_score.size(-1) == self.num_classes + 2
cls_score_classes = cls_score[..., :-2]
cls_score_objectness = cls_score[..., -2:]
return cls_score_classes, cls_score_objectness
def get_cls_channels(self, num_classes):
"""Get custom classification channels.
Args:
num_classes (int): The number of classes.
Returns:
int: The custom classification channels.
"""
assert num_classes == self.num_classes
return num_classes + 2
def get_activation(self, cls_score):
"""Get custom activation of cls_score.
Args:
cls_score (torch.Tensor): The prediction with shape (N, C + 2).
Returns:
torch.Tensor: The custom activation of cls_score with shape
(N, C + 1).
"""
cls_score_classes, cls_score_objectness = self._split_cls_score(
cls_score)
score_classes = F.softmax(cls_score_classes, dim=-1)
score_objectness = F.softmax(cls_score_objectness, dim=-1)
score_pos = score_objectness[..., [0]]
score_neg = score_objectness[..., [1]]
score_classes = score_classes * score_pos
scores = torch.cat([score_classes, score_neg], dim=-1)
return scores
def get_accuracy(self, cls_score, labels):
"""Get custom accuracy w.r.t. cls_score and labels.
Args:
cls_score (torch.Tensor): The prediction with shape (N, C + 2).
labels (torch.Tensor): The learning label of the prediction.
Returns:
Dict [str, torch.Tensor]: The accuracy for objectness and classes,
respectively.
"""
pos_inds = labels < self.num_classes
obj_labels = (labels == self.num_classes).long()
cls_score_classes, cls_score_objectness = self._split_cls_score(
cls_score)
acc_objectness = accuracy(cls_score_objectness, obj_labels)
acc_classes = accuracy(cls_score_classes[pos_inds], labels[pos_inds])
acc = dict()
acc['acc_objectness'] = acc_objectness
acc['acc_classes'] = acc_classes
return acc
def forward(self,
cls_score,
labels,
label_weights=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
cls_score (torch.Tensor): The prediction with shape (N, C + 2).
labels (torch.Tensor): The learning label of the prediction.
label_weights (torch.Tensor, optional): Sample-wise loss weight.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction (str, optional): The method used to reduce the loss.
Options are "none", "mean" and "sum".
Returns:
torch.Tensor | Dict [str, torch.Tensor]:
if return_dict == False: The calculated loss |
if return_dict == True: The dict of calculated losses
for objectness and classes, respectively.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
assert cls_score.size(-1) == self.num_classes + 2
Reproduces the problem - command or script
official train script
Reproduces the problem - error message
Traceback (most recent call last):
File "tools/train.py", line 244, in
main()
File "tools/train.py", line 233, in main
train_detector(
File "/root/autodl-tmp/mmdetection/mmdet/apis/train.py", line 244, in train_detector
runner.run(data_loaders, cfg.workflow)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 136, in run
epoch_runner(data_loaders[i], kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 53, in train
self.run_iter(data_batch, train_mode=True, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 31, in run_iter
outputs = self.model.train_step(data_batch, self.optimizer,
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/parallel/data_parallel.py", line 77, in train_step
return self.module.train_step(inputs[0], kwargs[0])
File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/base.py", line 248, in train_step
losses = self(data)
File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(input, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 119, in new_func
return old_func(args, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/base.py", line 172, in forward
return self.forward_train(img, img_metas, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/yolact.py", line 76, in forward_train
losses, sampling_results = self.bbox_head.loss(
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 208, in new_func
return old_func(args, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/dense_heads/yolact_head.py", line 224, in loss
losses_cls, losses_bbox = multi_apply(
File "/root/autodl-tmp/mmdetection/mmdet/core/utils/misc.py", line 30, in multi_apply
return tuple(map(list, zip(map_results)))
File "/root/autodl-tmp/mmdetection/mmdet/models/dense_heads/yolact_head.py", line 265, in loss_single_OHEM
loss_cls_all = self.loss_cls(cls_score, labels, label_weights)
File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(input, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/losses/seesaw_loss.py", line 224, in forward
assert cls_score.size(-1) == self.num_classes + 2**
Additional information
Our dataset has 22 classes, following the COCO format.Seesaw Loss was used as the "loss_cls" in the bbox_head.
We used YOLACT as the instance segmentation algorithm.
This bug has been asked by other developers (issue: #6424), but no one has been slow to provide a solution. Can you give some help?
I tried to print "cls_score.size(-1)" and the result is "23"
Prerequisite
Task
I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
Branch
master branch https://github.com/open-mmlab/mmdetection
Environment
sys.platform: linux Python: 3.8.10 (default, Jun 4 2021, 15:09:15) [GCC 7.5.0] CUDA available: True GPU 0,1,2,3: NVIDIA RTX A5000 CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 11.3, V11.3.109 GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0 PyTorch: 1.10.0+cu113 PyTorch compiling details: PyTorch built with:
TorchVision: 0.11.1+cu113 OpenCV: 4.6.0 MMCV: 1.7.0 MMCV Compiler: GCC 9.3 MMCV CUDA Compiler: 11.3 MMDetection: 2.25.3+e71b499
Reproduces the problem - code sample
@LOSSES.register_module() class SeesawLoss(nn.Module): """ Seesaw Loss for Long-Tailed Instance Segmentation (CVPR 2021) arXiv: https://arxiv.org/abs/2008.10032 Args: use_sigmoid (bool, optional): Whether the prediction uses sigmoid of softmax. Only False is supported. p (float, optional): The
p
in the mitigation factor. Defaults to 0.8. q (float, optional): Theq
in the compenstation factor. Defaults to 2.0. num_classes (int, optional): The number of classes. Default to 1203 for LVIS v1 dataset. eps (float, optional): The minimal value of divisor to smooth the computation of compensation factor reduction (str, optional): The method that reduces the loss to a scalar. Options are "none", "mean" and "sum". loss_weight (float, optional): The weight of the loss. Defaults to 1.0 return_dict (bool, optional): Whether return the losses as a dict. Default to True. """Reproduces the problem - command or script
official train script
Reproduces the problem - error message
Traceback (most recent call last): File "tools/train.py", line 244, in
main()
File "tools/train.py", line 233, in main
train_detector(
File "/root/autodl-tmp/mmdetection/mmdet/apis/train.py", line 244, in train_detector
runner.run(data_loaders, cfg.workflow)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 136, in run
epoch_runner(data_loaders[i], kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 53, in train
self.run_iter(data_batch, train_mode=True, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 31, in run_iter
outputs = self.model.train_step(data_batch, self.optimizer,
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/parallel/data_parallel.py", line 77, in train_step
return self.module.train_step(inputs[0], kwargs[0])
File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/base.py", line 248, in train_step
losses = self(data)
File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(input, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 119, in new_func
return old_func(args, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/base.py", line 172, in forward
return self.forward_train(img, img_metas, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/detectors/yolact.py", line 76, in forward_train
losses, sampling_results = self.bbox_head.loss(
File "/root/miniconda3/lib/python3.8/site-packages/mmcv/runner/fp16_utils.py", line 208, in new_func
return old_func(args, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/dense_heads/yolact_head.py", line 224, in loss
losses_cls, losses_bbox = multi_apply(
File "/root/autodl-tmp/mmdetection/mmdet/core/utils/misc.py", line 30, in multi_apply
return tuple(map(list, zip(map_results)))
File "/root/autodl-tmp/mmdetection/mmdet/models/dense_heads/yolact_head.py", line 265, in loss_single_OHEM
loss_cls_all = self.loss_cls(cls_score, labels, label_weights)
File "/root/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(input, kwargs)
File "/root/autodl-tmp/mmdetection/mmdet/models/losses/seesaw_loss.py", line 224, in forward
assert cls_score.size(-1) == self.num_classes + 2**
Additional information
Our dataset has 22 classes, following the COCO format. Seesaw Loss was used as the "loss_cls" in the bbox_head. We used YOLACT as the instance segmentation algorithm. This bug has been asked by other developers (issue: #6424), but no one has been slow to provide a solution. Can you give some help? I tried to print "cls_score.size(-1)" and the result is "23"
Our config information : base = '../base/default_runtime.py' img_size = 550 model = dict( type='YOLACT', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True), norm_eval=False, # update the statistics of bn zero_init_residual=False, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, start_level=1, add_extra_convs='on_input', num_outs=5, upsample_cfg=dict(mode='bilinear')), bbox_head=dict( type='YOLACTHead', num_classes=22, in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', octave_base_scale=3, scales_per_octave=1, base_sizes=[8, 16, 32, 64, 128], ratios=[0.5, 1.0, 2.0], strides=[550.0 / x for x in [69, 35, 18, 9, 5]], centers=[(550 0.5 / x, 550 0.5 / x) for x in [69, 35, 18, 9, 5]]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[0.1, 0.1, 0.2, 0.2]), loss_cls=dict( type='SeesawLoss', p=0.8, q=2.0, num_classes=22, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5), num_head_convs=1, num_protos=32, use_ohem=True), mask_head=dict( type='YOLACTProtonet', in_channels=256, num_protos=32, num_classes=22, max_masks_to_train=100, loss_mask_weight=6.125), segm_head=dict( type='YOLACTSegmHead', num_classes=22, in_channels=256, loss_segm=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), train_cfg=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0., ignore_iof_thr=-1, gt_max_assign_all=False),
smoothl1_beta=1.,
dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)), dict( type='Expand', mean=img_norm_cfg['mean'], to_rgb=img_norm_cfg['to_rgb'], ratio_range=(1, 4)), dict( type='MinIoURandomCrop', min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False), dict(type='RandomFlip', flip_ratio=0.5), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict(type='Normalize', img_norm_cfg), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(img_size, img_size), flip=False, transforms=[ dict(type='Resize', keep_ratio=False), dict(type='Normalize', img_norm_cfg), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), ]) ] data = dict( samples_per_gpu=8, workers_per_gpu=4, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', pipeline=train_pipeline), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', pipeline=test_pipeline)) optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) optimizer_config = dict() lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.1, step=[20, 42, 49, 52]) runner = dict(type='EpochBasedRunner', max_epochs=55) cudnn_benchmark = True evaluation = dict(metric=['bbox', 'segm']) auto_scale_lr = dict(enable=True, base_batch_size=32)