open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
29.33k stars 9.42k forks source link

How to add vgg16 to the backbone of Faster R-cnn? #4849

Closed Cloudyunn closed 3 years ago

Cloudyunn commented 3 years ago

In order to compare the data in the graduation thesis, I need to use vgg16 as the Faster R-CNN model of the backbone, but the default in mmdetection is only resnet and ssdvgg. How can I add vgg to successfully train?

fengyouliang commented 3 years ago

Add vgg.py to mmdet/models/backbones/vgg.py.

# vgg.py
# Copyright (c) Open-MMLab. All rights reserved.

import torch.nn as nn
from mmcv.cnn.utils import constant_init, kaiming_init, normal_init
from mmcv.utils.parrots_wrapper import _BatchNorm
from mmdet.models.builder import BACKBONES

def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)

def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers

@BACKBONES.register_module()
class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 frozen_stages=-1,
                 bn_eval=True,
                 norm_eval=False,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert 1 <= num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.norm_eval = norm_eval

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2 ** i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

    def init_weights(self, pretrained=None):
        if pretrained is None:
            for m in self.features:
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)

        return tuple(outs)

    def _freeze_stages(self):
        vgg_layers = getattr(self, self.module_name)
        for i in range(self.frozen_stages):
            for j in range(*self.range_sub_modules[i]):
                m = vgg_layers[j]
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def train(self, mode=True):
        super(VGG, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

Change model/models/backbone/__init__.py

Change config you use( original FasterRCNN config)

model = dict(
    type='FasterRCNN',
    pretrained=None,
    backbone=dict(
        type='VGG',
        depth=19,
        with_bn=False,
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(1, 2, 3, 4),
        frozen_stages=-1,
        bn_eval=False,
        bn_frozen=False,
        ceil_mode=False,
        with_last_pool=False,
    ),
    neck=dict(
        in_channels=[128, 256, 512, 512],),
)

Only modifications are displayed PS: Not support pretrain model yet

Cloudyunn commented 3 years ago

Add vgg.py to mmdet/models/backbones/vgg.py.

# vgg.py
# Copyright (c) Open-MMLab. All rights reserved.

import torch.nn as nn
from mmcv.cnn.utils import constant_init, kaiming_init, normal_init
from mmcv.utils.parrots_wrapper import _BatchNorm
from mmdet.models.builder import BACKBONES

def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)

def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers

@BACKBONES.register_module()
class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 frozen_stages=-1,
                 bn_eval=True,
                 norm_eval=False,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert 1 <= num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.norm_eval = norm_eval

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2 ** i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

    def init_weights(self, pretrained=None):
        if pretrained is None:
            for m in self.features:
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)

        return tuple(outs)

    def _freeze_stages(self):
        vgg_layers = getattr(self, self.module_name)
        for i in range(self.frozen_stages):
            for j in range(*self.range_sub_modules[i]):
                m = vgg_layers[j]
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def train(self, mode=True):
        super(VGG, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

Change model/models/backbone/__init__.py

  • add from .vgg import VGG
  • add "VGG" into __all__

Change config you use( original FasterRCNN config)

model = dict(
    type='FasterRCNN',
    pretrained=None,
    backbone=dict(
        type='VGG',
        depth=19,
        with_bn=False,
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(1, 2, 3, 4),
        frozen_stages=-1,
        bn_eval=False,
        bn_frozen=False,
        ceil_mode=False,
        with_last_pool=False,
    ),
    neck=dict(
        in_channels=[128, 256, 512, 512],),
)

Only modifications are displayed PS: Not support pretrain model yet

Thank you very much for your answer , but i still meet some questions: 1、At the beginning,i used the colab to train this backbone,and i have followed your prompts to complete,but it showed that mmcv-full 1.3.0 is not compatible and needs to be reduced to 1.2.4, so I reinstalled mmcv.Run again,it shows"VGG is not in the backbone registry" 2、I want to confirm once again that your modification to the config above directly replaces the original part?

Traceback (most recent call last): File "tools/train.py", line 187, in main() File "tools/train.py", line 161, in main test_cfg=cfg.get('test_cfg')) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 77, in build_detector return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 34, in build return build_from_cfg(cfg, registry, default_args) File "/usr/local/lib/python3.7/dist-packages/mmcv/utils/registry.py", line 181, in build_from_cfg return obj_cls(**args) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/detectors/faster_rcnn.py", line 24, in init pretrained=pretrained) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/detectors/two_stage.py", line 26, in init self.backbone = build_backbone(backbone) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 39, in build_backbone return build(cfg, BACKBONES) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 34, in build return build_from_cfg(cfg, registry, default_args) File "/usr/local/lib/python3.7/dist-packages/mmcv/utils/registry.py", line 174, in build_from_cfg f'{obj_type} is not in the {registry.name} registry') KeyError: 'VGG is not in the backbone registry'

fengyouliang commented 3 years ago

1. not registry

  1. put vgg.py in mmdet/models/backbones/vgg.py
  2. add from .vgg import VGG
  3. add "VGG" into __all__

2. Replace the corresponding entry


You can refer to the documentation

inder9999 commented 3 years ago

@fengyouliang, @jshilong
Hi, I want to run Faster R-CNN with vgg16 without FPN network(i.e. neck). So what changes would be required in the configurations? Thanks for the help.

Cloudyunn commented 3 years ago

Sorry,i did not solve this problem yet.Actually,i haven't done this research for a long time,you can ask the administrator for some help.

------------------ 原始邮件 ------------------ 发件人: "open-mmlab/mmdetection" @.>; 发送时间: 2021年6月28日(星期一) 下午2:52 @.>; @.**@.>; 主题: Re: [open-mmlab/mmdetection] How to add vgg16 to the backbone of Faster R-cnn? (#4849)

Hi, I want to run Faster R-cnn with vgg16 witout fpn network(i.e. neck). So what changes would be required in the configurations? Thanks for the help.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.

jshilong commented 3 years ago

@fengyouliang, @jshilong Hi, I want to run Faster R-CNN with vgg16 without FPN network(i.e. neck). So what changes would be required in the configurations? Thanks for the help.

Sorry for the late respondence, you can refer to https://github.com/open-mmlab/mmdetection/blob/5590f80edf6f45c7a69fab883b3200d68dc9b213/configs/_base_/models/faster_rcnn_r50_caffe_c4.py#L1 to modify your configuration

jieruyao49 commented 3 years ago

Add vgg.py to mmdet/models/backbones/vgg.py.

# vgg.py
# Copyright (c) Open-MMLab. All rights reserved.

import torch.nn as nn
from mmcv.cnn.utils import constant_init, kaiming_init, normal_init
from mmcv.utils.parrots_wrapper import _BatchNorm
from mmdet.models.builder import BACKBONES

def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)

def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers

@BACKBONES.register_module()
class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 frozen_stages=-1,
                 bn_eval=True,
                 norm_eval=False,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert 1 <= num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.norm_eval = norm_eval

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2 ** i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

    def init_weights(self, pretrained=None):
        if pretrained is None:
            for m in self.features:
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)

        return tuple(outs)

    def _freeze_stages(self):
        vgg_layers = getattr(self, self.module_name)
        for i in range(self.frozen_stages):
            for j in range(*self.range_sub_modules[i]):
                m = vgg_layers[j]
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def train(self, mode=True):
        super(VGG, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

Change model/models/backbone/__init__.py

  • add from .vgg import VGG
  • add "VGG" into __all__

Change config you use( original FasterRCNN config)

model = dict(
    type='FasterRCNN',
    pretrained=None,
    backbone=dict(
        type='VGG',
        depth=19,
        with_bn=False,
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(1, 2, 3, 4),
        frozen_stages=-1,
        bn_eval=False,
        bn_frozen=False,
        ceil_mode=False,
        with_last_pool=False,
    ),
    neck=dict(
        in_channels=[128, 256, 512, 512],),
)

Only modifications are displayed PS: Not support pretrain model yet

How to support vgg19 pre-trained on ImageNet?

zhanghao5201 commented 2 years ago

1. not registry

  1. put vgg.py in mmdet/models/backbones/vgg.py
  2. add from .vgg import VGG
  3. add "VGG" into __all__

2. Replace the corresponding entry

You can refer to the documentation

i also do as this, but it is still wrong KeyError: "SingleStageDetector: 'XXNet is not in the models registry'"

pinnintipraneethkumar commented 1 year ago

Thanks for the answer! it worked for me..