open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
29.43k stars 9.43k forks source link

I want use add an mask branch to ssd #829

Closed andeyeluguo closed 5 years ago

andeyeluguo commented 5 years ago

the config file is like this:

model settings

input_size = 512 model = dict( type='SingleStageMaskDetector', pretrained='open-mmlab://vgg16_caffe', backbone=dict( type='SSDVGG', input_size=input_size, depth=16, with_last_pool=False, ceil_mode=True, out_indices=(3, 4), out_feature_indices=(22, 34), l2_norm_scale=20), neck=None, bbox_head=dict( type='SSDHead', input_size=input_size, in_channels=(512, 1024, 512, 256, 256, 256, 256), num_classes=81, anchor_strides=(8, 16, 32, 64, 128, 256, 512), basesize_ratio_range=(0.15, 0.9), anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2)), mask_head=dict( type='SSDMaskHead', num_convs=4, in_channels=(512, 1024, 512, 256, 256, 256), conv_out_channels=(2, 2, 2, 2, 2, 2), num_classes=81, loss_mask=dict( type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) cudnn_benchmark = True train_cfg = dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0., ignore_iof_thr=-1, gt_max_assign_all=False), smoothl1_beta=1., allowed_border=-1, pos_weight=-1, neg_pos_ratio=3, mask_size=28, debug=False) test_cfg = dict( nms=dict(type='nms', iou_thr=0.45), min_bbox_size=0, score_thr=0.02, max_per_img=200)

dataset settings

dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) data = dict( imgs_per_gpu=1, workers_per_gpu=1, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', img_scale=(input_size, input_size), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0.5, with_mask=True, with_crowd=True, with_label=True), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(input_size, input_size), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0, with_mask=True, with_crowd=True, with_label=True), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(input_size, input_size), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0, with_mask=False, with_label=False, test_mode=True))

optimizer

optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))

learning policy

lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, step=[8, 11]) checkpoint_config = dict(interval=1)

yapf:disable

log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'),

dict(type='TensorboardLoggerHook')

])

yapf:enable

runtime settings

total_epochs = 12 dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/tsm_mask_ssd' load_from = None resume_from = None workflow = [('train', 1)]

andeyeluguo commented 5 years ago

the single_stage_mask.py file is like this import torch.nn as nn

from .single_stage import BaseDetector from .. import builder from ..registry import DETECTORS from mmdet.core import bbox2result import pdb

@DETECTORS.register_module class SingleStageMaskDetector(BaseDetector):

def __init__(self,
             backbone,
             neck=None,
             bbox_head=None,
             mask_head=None,
             train_cfg=None,
             test_cfg=None,
             pretrained=None):
    super(SingleStageMaskDetector, self).__init__()
    self.backbone = builder.build_backbone(backbone)
    if neck is not None:
        self.neck = builder.build_neck(neck)
    self.bbox_head = builder.build_head(bbox_head)
    self.mask_head = builder.build_head(mask_head)
    self.train_cfg = train_cfg
    self.test_cfg = test_cfg
    self.init_weights(pretrained=pretrained)

def init_weights(self, pretrained=None):
    super(SingleStageMaskDetector, self).init_weights(pretrained)
    self.backbone.init_weights(pretrained=pretrained)
    if self.with_neck:
        if isinstance(self.neck, nn.Sequential):
            for m in self.neck:
                m.init_weights()
        else:
            self.neck.init_weights()
    self.bbox_head.init_weights()
    self.mask_head.init_weights()

def extract_feat(self, img):
    x = self.backbone(img)
    if self.with_neck:
        x = self.neck(x)
    return x

def forward_train(self,
                  img,
                  img_metas,
                  gt_bboxes,
                  gt_labels,
                  gt_bboxes_ignore=None,
                  gt_masks=None):
    x = self.extract_feat(img)
    outs = self.bbox_head(x)
    #mask_preds = self.mask_head(x)
    #pdb.set_trace()
    #loss_mask = self.mask_head.loss(mask_pred, gt_masks, pos_labels)
    loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
    losses = self.bbox_head.loss(
        *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
    return losses

def simple_test(self, img, img_meta, rescale=False):
    x = self.extract_feat(img)
    outs = self.bbox_head(x)
    bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
    bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
    bbox_results = [
        bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
        for det_bboxes, det_labels in bbox_list
    ]
    return bbox_results[0]

def aug_test(self, imgs, img_metas, rescale=False):
    raise NotImplementedError
andeyeluguo commented 5 years ago

the ssd_head.py is like this: import mmcv import numpy as np import pycocotools.mask as mask_util import torch import torch.nn as nn

from ..builder import build_loss from ..registry import HEADS from ..utils import ConvModule from mmdet.core import mask_target import pdb

@HEADS.register_module class SSDMaskHead(nn.Module):

def __init__(self,
             num_convs=4,
             roi_feat_size=14,
             in_channels=256,
             conv_kernel_size=3,
             conv_out_channels=2,
             upsample_method='deconv',
             upsample_ratio=2,
             num_classes=81,
             class_agnostic=False,
             conv_cfg=None,
             norm_cfg=None,
             loss_mask=dict(
                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)):
    super(SSDMaskHead, self).__init__()
    if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:
        raise ValueError(
            'Invalid upsample method {}, accepted methods '
            'are "deconv", "nearest", "bilinear"'.format(upsample_method))
    self.num_convs = num_convs
    self.roi_feat_size = roi_feat_size  # WARN: not used and reserved
    self.in_channels = in_channels
    self.conv_kernel_size = conv_kernel_size
    self.conv_out_channels = conv_out_channels
    self.upsample_method = upsample_method
    #self.upsample_ratio = upsample_ratio
    self.upsample_ratios = [8,16,32,64]
    self.num_classes = num_classes
    self.class_agnostic = class_agnostic
    self.conv_cfg = conv_cfg
    self.norm_cfg = norm_cfg
    self.loss_mask = build_loss(loss_mask)

    self.convs_group = []
    self.upsample_group = []
    self.conv_logits_group = []
    for j in range(4):#len(self.in_channels)
        convs = nn.ModuleList()
        for i in range(self.num_convs):
            in_channels = (
                self.in_channels[j] if i == 0 else self.conv_out_channels[j])
            padding = (self.conv_kernel_size - 1) // 2
            convs.append(
                ConvModule(
                    in_channels,
                    self.conv_out_channels[j],
                    self.conv_kernel_size,
                    padding=padding,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg))
        self.convs_group.append(convs)

        upsample_in_channels = (
            self.conv_out_channels[j] if self.num_convs > 0 else in_channels)
        upsample = None
        if self.upsample_method is None:
                upsample = None
        elif self.upsample_method == 'deconv':
                upsample = nn.ConvTranspose2d(
                upsample_in_channels,
                self.conv_out_channels[j],
                self.upsample_ratios[j],
                stride=self.upsample_ratios[j])
        else:
                upsample = nn.Upsample(scale_factor=self.upsample_ratios[j], mode=self.upsample_method)
        self.upsample_group.append(upsample)

        out_channels = 1 if self.class_agnostic else 2#self.num_classes
        logits_in_channel = (
            self.conv_out_channels[j]
            if self.upsample_method == 'deconv' else upsample_in_channels)
        conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1)
        self.conv_logits_group.append(conv_logits)
    self.relu = nn.ReLU(inplace=True)
    self.debug_imgs = None

def init_weights(self):
    for j in range(4):
        for m in [self.upsample_group[j], self.conv_logits_group[j]]:
            if m is None:
                continue
            nn.init.kaiming_normal_(
                m.weight, mode='fan_out', nonlinearity='relu')
            nn.init.constant_(m.bias, 0)

def forward(self, x):
    mask_preds = []
    for j in range(4):
        convs = self.convs_group[j]
        xx = x[j]
        for conv in convs:
            conv = conv.cuda()
            xx = conv(xx)
        if self.upsample_group is not None:
            upsample = self.upsample_group[j].cuda()
            xx = upsample(xx)
            if self.upsample_method == 'deconv':
                xx = self.relu(xx)
        conv_logits = self.conv_logits_group[j].cuda()
        mask_pred = conv_logits(xx)
        mask_preds.append(mask_pred)
    return mask_preds

def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
    pos_proposals = [res.pos_bboxes for res in sampling_results]
    pos_assigned_gt_inds = [
        res.pos_assigned_gt_inds for res in sampling_results
    ]
    mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
                               gt_masks, rcnn_train_cfg)
    return mask_targets

def loss(self, mask_pred, mask_targets, labels):
    loss = dict()
    if self.class_agnostic:
        loss_mask = self.loss_mask(mask_pred, mask_targets,
                                   torch.zeros_like(labels))
    else:
        loss_mask = self.loss_mask(mask_pred, mask_targets, labels)
    loss['loss_mask'] = loss_mask
    return loss

def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
                  ori_shape, scale_factor, rescale):
    """Get segmentation masks from mask_pred and bboxes.

    Args:
        mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
            For single-scale testing, mask_pred is the direct output of
            model, whose type is Tensor, while for multi-scale testing,
            it will be converted to numpy array outside of this method.
        det_bboxes (Tensor): shape (n, 4/5)
        det_labels (Tensor): shape (n, )
        img_shape (Tensor): shape (3, )
        rcnn_test_cfg (dict): rcnn testing config
        ori_shape: original image size

    Returns:
        list[list]: encoded masks
    """
    print('waiting for rewrite.....................................')
    if isinstance(mask_pred, torch.Tensor):
        mask_pred = mask_pred.sigmoid().cpu().numpy()
    assert isinstance(mask_pred, np.ndarray)

    cls_segms = [[] for _ in range(self.num_classes - 1)]
    bboxes = det_bboxes.cpu().numpy()[:, :4]
    labels = det_labels.cpu().numpy() + 1

    if rescale:
        img_h, img_w = ori_shape[:2]
    else:
        img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
        img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
        scale_factor = 1.0

    for i in range(bboxes.shape[0]):
        bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
        label = labels[i]
        w = max(bbox[2] - bbox[0] + 1, 1)
        h = max(bbox[3] - bbox[1] + 1, 1)

        if not self.class_agnostic:
            mask_pred_ = mask_pred[i, label, :, :]
        else:
            mask_pred_ = mask_pred[i, 0, :, :]
        im_mask = np.zeros((img_h, img_w), dtype=np.uint8)

        bbox_mask = mmcv.imresize(mask_pred_, (w, h))
        bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
            np.uint8)
        im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
        rle = mask_util.encode(
            np.array(im_mask[:, :, np.newaxis], order='F'))[0]
        cls_segms[label - 1].append(rle)

    return cls_segms
andeyeluguo commented 5 years ago

but it occours the error File "./tools/train.py", line 98, in main() File "./tools/train.py", line 94, in main logger=logger) File "/home/zhaodz/algorithm/mmdetection/mmdet/apis/train.py", line 59, in train_detector _dist_train(model, dataset, cfg, validate=validate) File "/home/zhaodz/algorithm/mmdetection/mmdet/apis/train.py", line 171, in _dist_train runner.run(data_loaders, cfg.workflow, cfg.total_epochs) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv-0.2.8-py3.7.egg/mmcv/runner/runner.py", l ine 356, in run epoch_runner(data_loaders[i], kwargs) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv-0.2.8-py3.7.egg/mmcv/runner/runner.py", l ine 262, in train self.model, data_batch, train_mode=True, kwargs) File "/home/zhaodz/algorithm/mmdetection/mmdet/apis/train.py", line 39, in batch_processor losses = model(data) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in ca ll result = self.forward(*input, *kwargs) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv-0.2.8-py3.7.egg/mmcv/parallel/distributed .py", line 50, in forward return self.module(inputs[0], kwargs[0]) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in ca ll result = self.forward(input, kwargs) File "/home/zhaodz/algorithm/mmdetection/mmdet/models/detectors/base.py", line 84, in forward return self.forward_train(img, img_meta, kwargs) File "/home/zhaodz/algorithm/mmdetection/mmdet/models/detectors/single_stage_mask.py", line 55, in forward_train x = self.extract_feat(img) File "/home/zhaodz/algorithm/mmdetection/mmdet/models/detectors/single_stage_mask.py", line 43, in extract_feat x = self.backbone(img) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in ca ll result = self.forward(input, *kwargs) File "/home/zhaodz/algorithm/mmdetection/mmdet/models/backbones/ssd_vgg.py", line 83, in forward x = F.relu(layer(x), inplace=True) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in ca ll result = self.forward(input, **kwargs) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 338, in forwar d self.padding, self.dilation, self.groups) RuntimeError: Calculated padded input size per channel: (3 x 4). Kernel size: (4 x 4). Kernel size can't be greater than actual i nput size Traceback (most recent call last): File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 85, in _run_code exec(code, run_globals) File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 235, in main() File "/home/zhaodz/program/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 231, in mai n cmd=process.args) subprocess.CalledProcessError: Command '['/home/zhaodz/program/anaconda3/envs/open-mmlab/bin/python', '-u', './tools/train.py', ' --local_rank=0', './configs/mask_ssd.py', '--launcher', 'pytorch', '--validate']' returned non-zero exit status 1.

andeyeluguo commented 5 years ago

I have no idea about that.....

Epiphqny commented 5 years ago

I have no idea about that.....

您好,请问你这个问题解决了么,我也有类似的问题想请教一下