How to add vgg16 to the backbone of Faster R-cnn?

In order to compare the data in the graduation thesis, I need to use vgg16 as the Faster R-CNN model of the backbone, but the default in mmdetection is only resnet and ssdvgg. How can I add vgg to successfully train?

Add `vgg.py` to `mmdet/models/backbones/vgg.py`.

# vgg.py
# Copyright (c) Open-MMLab. All rights reserved.

import torch.nn as nn
from mmcv.cnn.utils import constant_init, kaiming_init, normal_init
from mmcv.utils.parrots_wrapper import _BatchNorm
from mmdet.models.builder import BACKBONES

def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)

def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers

@BACKBONES.register_module()
class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 frozen_stages=-1,
                 bn_eval=True,
                 norm_eval=False,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert 1 <= num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.norm_eval = norm_eval

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2 ** i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

    def init_weights(self, pretrained=None):
        if pretrained is None:
            for m in self.features:
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)

        return tuple(outs)

    def _freeze_stages(self):
        vgg_layers = getattr(self, self.module_name)
        for i in range(self.frozen_stages):
            for j in range(*self.range_sub_modules[i]):
                m = vgg_layers[j]
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def train(self, mode=True):
        super(VGG, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

Change `model/models/backbone/init.py`

add from .vgg import VGG
add "VGG" into __all__

Change config you use( original FasterRCNN config)

model = dict(
    type='FasterRCNN',
    pretrained=None,
    backbone=dict(
        type='VGG',
        depth=19,
        with_bn=False,
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(1, 2, 3, 4),
        frozen_stages=-1,
        bn_eval=False,
        bn_frozen=False,
        ceil_mode=False,
        with_last_pool=False,
    ),
    neck=dict(
        in_channels=[128, 256, 512, 512],),
)

Only modifications are displayed PS: Not support pretrain model yet

Add `vgg.py` to `mmdet/models/backbones/vgg.py`.

# vgg.py
# Copyright (c) Open-MMLab. All rights reserved.

import torch.nn as nn
from mmcv.cnn.utils import constant_init, kaiming_init, normal_init
from mmcv.utils.parrots_wrapper import _BatchNorm
from mmdet.models.builder import BACKBONES

def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)

def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers

@BACKBONES.register_module()
class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 frozen_stages=-1,
                 bn_eval=True,
                 norm_eval=False,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert 1 <= num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.norm_eval = norm_eval

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2 ** i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

    def init_weights(self, pretrained=None):
        if pretrained is None:
            for m in self.features:
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)

        return tuple(outs)

    def _freeze_stages(self):
        vgg_layers = getattr(self, self.module_name)
        for i in range(self.frozen_stages):
            for j in range(*self.range_sub_modules[i]):
                m = vgg_layers[j]
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def train(self, mode=True):
        super(VGG, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

Change `model/models/backbone/init.py`

add from .vgg import VGG
add "VGG" into __all__

Change config you use( original FasterRCNN config)

model = dict(
    type='FasterRCNN',
    pretrained=None,
    backbone=dict(
        type='VGG',
        depth=19,
        with_bn=False,
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(1, 2, 3, 4),
        frozen_stages=-1,
        bn_eval=False,
        bn_frozen=False,
        ceil_mode=False,
        with_last_pool=False,
    ),
    neck=dict(
        in_channels=[128, 256, 512, 512],),
)

Only modifications are displayed PS: Not support pretrain model yet

Thank you very much for your answer , but i still meet some questions: 1、At the beginning,i used the colab to train this backbone，and i have followed your prompts to complete,but it showed that mmcv-full 1.3.0 is not compatible and needs to be reduced to 1.2.4, so I reinstalled mmcv.Run again,it shows"VGG is not in the backbone registry" 2、I want to confirm once again that your modification to the config above directly replaces the original part?

Traceback (most recent call last): File "tools/train.py", line 187, in main() File "tools/train.py", line 161, in main test_cfg=cfg.get('test_cfg')) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 77, in build_detector return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 34, in build return build_from_cfg(cfg, registry, default_args) File "/usr/local/lib/python3.7/dist-packages/mmcv/utils/registry.py", line 181, in build_from_cfg return obj_cls(**args) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/detectors/faster_rcnn.py", line 24, in init pretrained=pretrained) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/detectors/two_stage.py", line 26, in init self.backbone = build_backbone(backbone) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 39, in build_backbone return build(cfg, BACKBONES) File "/usr/local/lib/python3.7/dist-packages/mmdet/models/builder.py", line 34, in build return build_from_cfg(cfg, registry, default_args) File "/usr/local/lib/python3.7/dist-packages/mmcv/utils/registry.py", line 174, in build_from_cfg f'{obj_type} is not in the {registry.name} registry') KeyError: 'VGG is not in the backbone registry'

1. not registry

put vgg.py in mmdet/models/backbones/vgg.py
add from .vgg import VGG
add "VGG" into __all__

2. Replace the corresponding entry

You can refer to the documentation

@fengyouliang, @jshilong
Hi, I want to run Faster R-CNN with vgg16 without FPN network(i.e. neck). So what changes would be required in the configurations? Thanks for the help.

Sorry，i did not solve this problem yet.Actually，i haven't done this research for a long time，you can ask the administrator for some help.

------------------ 原始邮件 ------------------ 发件人: "open-mmlab/mmdetection" @.>; 发送时间: 2021年6月28日(星期一) 下午2:52 @.>; @.**@.>; 主题: Re: [open-mmlab/mmdetection] How to add vgg16 to the backbone of Faster R-cnn? (#4849)

Hi, I want to run Faster R-cnn with vgg16 witout fpn network(i.e. neck). So what changes would be required in the configurations? Thanks for the help.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.

@fengyouliang, @jshilong Hi, I want to run Faster R-CNN with vgg16 without FPN network(i.e. neck). So what changes would be required in the configurations? Thanks for the help.

Sorry for the late respondence, you can refer to https://github.com/open-mmlab/mmdetection/blob/5590f80edf6f45c7a69fab883b3200d68dc9b213/configs/_base_/models/faster_rcnn_r50_caffe_c4.py#L1 to modify your configuration

Add `vgg.py` to `mmdet/models/backbones/vgg.py`.

# vgg.py
# Copyright (c) Open-MMLab. All rights reserved.

import torch.nn as nn
from mmcv.cnn.utils import constant_init, kaiming_init, normal_init
from mmcv.utils.parrots_wrapper import _BatchNorm
from mmdet.models.builder import BACKBONES

def conv3x3(in_planes, out_planes, dilation=1):
    """3x3 convolution with padding."""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        padding=dilation,
        dilation=dilation)

def make_vgg_layer(inplanes,
                   planes,
                   num_blocks,
                   dilation=1,
                   with_bn=False,
                   ceil_mode=False):
    layers = []
    for _ in range(num_blocks):
        layers.append(conv3x3(inplanes, planes, dilation))
        if with_bn:
            layers.append(nn.BatchNorm2d(planes))
        layers.append(nn.ReLU(inplace=True))
        inplanes = planes
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))

    return layers

@BACKBONES.register_module()
class VGG(nn.Module):
    """VGG backbone.

    Args:
        depth (int): Depth of vgg, from {11, 13, 16, 19}.
        with_bn (bool): Use BatchNorm or not.
        num_classes (int): number of classes for classification.
        num_stages (int): VGG stages, normally 5.
        dilations (Sequence[int]): Dilation of each stage.
        out_indices (Sequence[int]): Output from which stages.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters.
        bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
            running stats (mean and var).
        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
    """

    arch_settings = {
        11: (1, 1, 2, 2, 2),
        13: (2, 2, 2, 2, 2),
        16: (2, 2, 3, 3, 3),
        19: (2, 2, 4, 4, 4)
    }

    def __init__(self,
                 depth,
                 with_bn=False,
                 num_stages=5,
                 dilations=(1, 1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 frozen_stages=-1,
                 bn_eval=True,
                 norm_eval=False,
                 bn_frozen=False,
                 ceil_mode=False,
                 with_last_pool=True):
        super(VGG, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for vgg')
        assert 1 <= num_stages <= 5
        stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        assert len(dilations) == num_stages
        assert max(out_indices) <= num_stages

        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.bn_eval = bn_eval
        self.bn_frozen = bn_frozen
        self.norm_eval = norm_eval

        self.inplanes = 3
        start_idx = 0
        vgg_layers = []
        self.range_sub_modules = []
        for i, num_blocks in enumerate(self.stage_blocks):
            num_modules = num_blocks * (2 + with_bn) + 1
            end_idx = start_idx + num_modules
            dilation = dilations[i]
            planes = 64 * 2 ** i if i < 4 else 512
            vgg_layer = make_vgg_layer(
                self.inplanes,
                planes,
                num_blocks,
                dilation=dilation,
                with_bn=with_bn,
                ceil_mode=ceil_mode)
            vgg_layers.extend(vgg_layer)
            self.inplanes = planes
            self.range_sub_modules.append([start_idx, end_idx])
            start_idx = end_idx
        if not with_last_pool:
            vgg_layers.pop(-1)
            self.range_sub_modules[-1][1] -= 1
        self.module_name = 'features'
        self.add_module(self.module_name, nn.Sequential(*vgg_layers))

    def init_weights(self, pretrained=None):
        if pretrained is None:
            for m in self.features:
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)

    def forward(self, x):
        outs = []
        vgg_layers = getattr(self, self.module_name)
        for i in range(len(self.stage_blocks)):
            for j in range(*self.range_sub_modules[i]):
                vgg_layer = vgg_layers[j]
                x = vgg_layer(x)
            if i in self.out_indices:
                outs.append(x)

        return tuple(outs)

    def _freeze_stages(self):
        vgg_layers = getattr(self, self.module_name)
        for i in range(self.frozen_stages):
            for j in range(*self.range_sub_modules[i]):
                m = vgg_layers[j]
                m.eval()
                for param in m.parameters():
                    param.requires_grad = False

    def train(self, mode=True):
        super(VGG, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
                    m.eval()

Change `model/models/backbone/init.py`

add from .vgg import VGG
add "VGG" into __all__

Change config you use( original FasterRCNN config)

model = dict(
    type='FasterRCNN',
    pretrained=None,
    backbone=dict(
        type='VGG',
        depth=19,
        with_bn=False,
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(1, 2, 3, 4),
        frozen_stages=-1,
        bn_eval=False,
        bn_frozen=False,
        ceil_mode=False,
        with_last_pool=False,
    ),
    neck=dict(
        in_channels=[128, 256, 512, 512],),
)

Only modifications are displayed PS: Not support pretrain model yet

How to support vgg19 pre-trained on ImageNet?

1. not registry

put vgg.py in mmdet/models/backbones/vgg.py

add from .vgg import VGG

add "VGG" into __all__

2. Replace the corresponding entry

You can refer to the documentation

i also do as this, but it is still wrong KeyError: "SingleStageDetector: 'XXNet is not in the models registry'"

Thanks for the answer! it worked for me..

open-mmlab / mmdetection