Closed andeyeluguo closed 5 years ago
the single_stage_mask.py file is like this import torch.nn as nn
from .single_stage import BaseDetector from .. import builder from ..registry import DETECTORS from mmdet.core import bbox2result import pdb
@DETECTORS.register_module class SingleStageMaskDetector(BaseDetector):
def __init__(self,
backbone,
neck=None,
bbox_head=None,
mask_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(SingleStageMaskDetector, self).__init__()
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
self.bbox_head = builder.build_head(bbox_head)
self.mask_head = builder.build_head(mask_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
def init_weights(self, pretrained=None):
super(SingleStageMaskDetector, self).init_weights(pretrained)
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
self.bbox_head.init_weights()
self.mask_head.init_weights()
def extract_feat(self, img):
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self,
img,
img_metas,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None):
x = self.extract_feat(img)
outs = self.bbox_head(x)
#mask_preds = self.mask_head(x)
#pdb.set_trace()
#loss_mask = self.mask_head.loss(mask_pred, gt_masks, pos_labels)
loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
losses = self.bbox_head.loss(
*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses
def simple_test(self, img, img_meta, rescale=False):
x = self.extract_feat(img)
outs = self.bbox_head(x)
bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
bbox_results = [
bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
for det_bboxes, det_labels in bbox_list
]
return bbox_results[0]
def aug_test(self, imgs, img_metas, rescale=False):
raise NotImplementedError
the ssd_head.py is like this: import mmcv import numpy as np import pycocotools.mask as mask_util import torch import torch.nn as nn
from ..builder import build_loss from ..registry import HEADS from ..utils import ConvModule from mmdet.core import mask_target import pdb
@HEADS.register_module class SSDMaskHead(nn.Module):
def __init__(self,
num_convs=4,
roi_feat_size=14,
in_channels=256,
conv_kernel_size=3,
conv_out_channels=2,
upsample_method='deconv',
upsample_ratio=2,
num_classes=81,
class_agnostic=False,
conv_cfg=None,
norm_cfg=None,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)):
super(SSDMaskHead, self).__init__()
if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:
raise ValueError(
'Invalid upsample method {}, accepted methods '
'are "deconv", "nearest", "bilinear"'.format(upsample_method))
self.num_convs = num_convs
self.roi_feat_size = roi_feat_size # WARN: not used and reserved
self.in_channels = in_channels
self.conv_kernel_size = conv_kernel_size
self.conv_out_channels = conv_out_channels
self.upsample_method = upsample_method
#self.upsample_ratio = upsample_ratio
self.upsample_ratios = [8,16,32,64]
self.num_classes = num_classes
self.class_agnostic = class_agnostic
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.loss_mask = build_loss(loss_mask)
self.convs_group = []
self.upsample_group = []
self.conv_logits_group = []
for j in range(4):#len(self.in_channels)
convs = nn.ModuleList()
for i in range(self.num_convs):
in_channels = (
self.in_channels[j] if i == 0 else self.conv_out_channels[j])
padding = (self.conv_kernel_size - 1) // 2
convs.append(
ConvModule(
in_channels,
self.conv_out_channels[j],
self.conv_kernel_size,
padding=padding,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg))
self.convs_group.append(convs)
upsample_in_channels = (
self.conv_out_channels[j] if self.num_convs > 0 else in_channels)
upsample = None
if self.upsample_method is None:
upsample = None
elif self.upsample_method == 'deconv':
upsample = nn.ConvTranspose2d(
upsample_in_channels,
self.conv_out_channels[j],
self.upsample_ratios[j],
stride=self.upsample_ratios[j])
else:
upsample = nn.Upsample(scale_factor=self.upsample_ratios[j], mode=self.upsample_method)
self.upsample_group.append(upsample)
out_channels = 1 if self.class_agnostic else 2#self.num_classes
logits_in_channel = (
self.conv_out_channels[j]
if self.upsample_method == 'deconv' else upsample_in_channels)
conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1)
self.conv_logits_group.append(conv_logits)
self.relu = nn.ReLU(inplace=True)
self.debug_imgs = None
def init_weights(self):
for j in range(4):
for m in [self.upsample_group[j], self.conv_logits_group[j]]:
if m is None:
continue
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
def forward(self, x):
mask_preds = []
for j in range(4):
convs = self.convs_group[j]
xx = x[j]
for conv in convs:
conv = conv.cuda()
xx = conv(xx)
if self.upsample_group is not None:
upsample = self.upsample_group[j].cuda()
xx = upsample(xx)
if self.upsample_method == 'deconv':
xx = self.relu(xx)
conv_logits = self.conv_logits_group[j].cuda()
mask_pred = conv_logits(xx)
mask_preds.append(mask_pred)
return mask_preds
def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
pos_proposals = [res.pos_bboxes for res in sampling_results]
pos_assigned_gt_inds = [
res.pos_assigned_gt_inds for res in sampling_results
]
mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
gt_masks, rcnn_train_cfg)
return mask_targets
def loss(self, mask_pred, mask_targets, labels):
loss = dict()
if self.class_agnostic:
loss_mask = self.loss_mask(mask_pred, mask_targets,
torch.zeros_like(labels))
else:
loss_mask = self.loss_mask(mask_pred, mask_targets, labels)
loss['loss_mask'] = loss_mask
return loss
def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
ori_shape, scale_factor, rescale):
"""Get segmentation masks from mask_pred and bboxes.
Args:
mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
For single-scale testing, mask_pred is the direct output of
model, whose type is Tensor, while for multi-scale testing,
it will be converted to numpy array outside of this method.
det_bboxes (Tensor): shape (n, 4/5)
det_labels (Tensor): shape (n, )
img_shape (Tensor): shape (3, )
rcnn_test_cfg (dict): rcnn testing config
ori_shape: original image size
Returns:
list[list]: encoded masks
"""
print('waiting for rewrite.....................................')
if isinstance(mask_pred, torch.Tensor):
mask_pred = mask_pred.sigmoid().cpu().numpy()
assert isinstance(mask_pred, np.ndarray)
cls_segms = [[] for _ in range(self.num_classes - 1)]
bboxes = det_bboxes.cpu().numpy()[:, :4]
labels = det_labels.cpu().numpy() + 1
if rescale:
img_h, img_w = ori_shape[:2]
else:
img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
scale_factor = 1.0
for i in range(bboxes.shape[0]):
bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
label = labels[i]
w = max(bbox[2] - bbox[0] + 1, 1)
h = max(bbox[3] - bbox[1] + 1, 1)
if not self.class_agnostic:
mask_pred_ = mask_pred[i, label, :, :]
else:
mask_pred_ = mask_pred[i, 0, :, :]
im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
bbox_mask = mmcv.imresize(mask_pred_, (w, h))
bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
np.uint8)
im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
rle = mask_util.encode(
np.array(im_mask[:, :, np.newaxis], order='F'))[0]
cls_segms[label - 1].append(rle)
return cls_segms
but it occours the error
File "./tools/train.py", line 98, in
I have no idea about that.....
I have no idea about that.....
您好,请问你这个问题解决了么,我也有类似的问题想请教一下
the config file is like this:
model settings
input_size = 512 model = dict( type='SingleStageMaskDetector', pretrained='open-mmlab://vgg16_caffe', backbone=dict( type='SSDVGG', input_size=input_size, depth=16, with_last_pool=False, ceil_mode=True, out_indices=(3, 4), out_feature_indices=(22, 34), l2_norm_scale=20), neck=None, bbox_head=dict( type='SSDHead', input_size=input_size, in_channels=(512, 1024, 512, 256, 256, 256, 256), num_classes=81, anchor_strides=(8, 16, 32, 64, 128, 256, 512), basesize_ratio_range=(0.15, 0.9), anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]), target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2)), mask_head=dict( type='SSDMaskHead', num_convs=4, in_channels=(512, 1024, 512, 256, 256, 256), conv_out_channels=(2, 2, 2, 2, 2, 2), num_classes=81, loss_mask=dict( type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) cudnn_benchmark = True train_cfg = dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0., ignore_iof_thr=-1, gt_max_assign_all=False), smoothl1_beta=1., allowed_border=-1, pos_weight=-1, neg_pos_ratio=3, mask_size=28, debug=False) test_cfg = dict( nms=dict(type='nms', iou_thr=0.45), min_bbox_size=0, score_thr=0.02, max_per_img=200)
dataset settings
dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) data = dict( imgs_per_gpu=1, workers_per_gpu=1, train=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', img_scale=(input_size, input_size), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0.5, with_mask=True, with_crowd=True, with_label=True), val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(input_size, input_size), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0, with_mask=True, with_crowd=True, with_label=True), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', img_scale=(input_size, input_size), img_norm_cfg=img_norm_cfg, size_divisor=None, flip_ratio=0, with_mask=False, with_label=False, test_mode=True))
optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
learning policy
lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=1.0 / 3, step=[8, 11]) checkpoint_config = dict(interval=1)
yapf:disable
log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
yapf:enable
runtime settings
total_epochs = 12 dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = './work_dirs/tsm_mask_ssd' load_from = None resume_from = None workflow = [('train', 1)]