Closed ZhuYun97 closed 4 years ago
I have tried to set learning rate as 0.025, 0.0025,.. and batchsize is 8. But the result is the same
There are some related codes
backbone.py
@registry.BACKBONES.register("ResNeSt-101-FPN")
def build_resnest_fpn_backbone(cfg):
body = resnest.ResNeSt(resnest.ResnestBottleneck, cfg,
radix=2, groups=1, bottleneck_width=64,
deep_stem=True, stem_width=64, avg_down=True,
avd=True, avd_first=False)
in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
fpn = fpn_module.FPN(
in_channels_list=[
in_channels_stage2,
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
],
out_channels=out_channels,
conv_block=conv_with_kaiming_uniform(
cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
),
top_blocks=fpn_module.LastLevelMaxPool(),
)
model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
model.out_channels = out_channels
return model
resnest.py
from detectron2.layers import (
Conv2d,
DeformConv,
FrozenBatchNorm2d,
ModulatedDeformConv,
NaiveSyncBatchNorm,
ShapeSpec,
get_norm,
)
from .splat import SplAtConv2d
from maskrcnn_benchmark.utils.registry import Registry
from collections import namedtuple
import torch
import torch.nn.functional as F
from torch import nn
import math
StageSpec = namedtuple(
"StageSpec",
[
"index", # Index of the stage, eg 1, 2, ..,. 5
"block_count", # Number of residual blocks in the stage
"return_features", # True => return the last feature map from this stage
],
)
# ResNeSt-101-FPN (including all stages)
ResNeSt101FPNStagesTo5 = tuple(
StageSpec(index=i, block_count=c, return_features=r)
for (i, c, r) in ((1, 3, True), (2, 4, True), (3, 23, True), (4, 3, True))
)
class ResNeSt(nn.Module):
def __init__(self, block, cfg, radix=2, groups=1, bottleneck_width=64,
dilated=False, dilation=1,
deep_stem=True, stem_width=64, avg_down=True,
rectified_conv=False, rectify_avg=False,
avd=False, avd_first=False,
final_drop=0.0, dropblock_prob=0,
last_gamma=False, norm_layer=nn.BatchNorm2d):
stage_specs = _STAGE_SPECS[cfg.MODEL.BACKBONE.CONV_BODY] # ResNeSt-101-FPN
self.stage_specs_ = stage_specs
self.cardinality = groups
self.bottleneck_width = bottleneck_width
# ResNet-D params
self.inplanes = stem_width*2 if deep_stem else 64
self.avg_down = avg_down
self.last_gamma = last_gamma
# ResNeSt params
self.radix = radix
self.avd = avd
self.avd_first = avd_first
super(ResNeSt, self).__init__()
self.rectified_conv = rectified_conv
self.rectify_avg = rectify_avg
if rectified_conv:
from rfconv import RFConv2d
conv_layer = RFConv2d
else:
conv_layer = nn.Conv2d
conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {}
if deep_stem:
norm_layer = FrozenBatchNorm2d
self.conv1 = nn.Sequential(
conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias=False,**conv_kwargs),
norm_layer(stem_width),
nn.ReLU(inplace=True),
conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
norm_layer(stem_width),
nn.ReLU(inplace=True),
conv_layer(stem_width, stem_width*2, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
)
else:
self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3,
bias=False, **conv_kwargs)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.return_features = {}
self.stages = []
for stage_spec in stage_specs:
name = "layer{}".format(stage_spec.index)
if stage_spec.index == 1:
norm_layer = FrozenBatchNorm2d
self.add_module(name, self._make_layer(block, 64, stage_spec.block_count, norm_layer=norm_layer, is_first=False))
elif stage_spec.index == 2:
norm_layer = NaiveSyncBatchNorm
self.add_module(name, self._make_layer(block, 128, stage_spec.block_count, stride=2, norm_layer=norm_layer))
elif stage_spec.index == 3:
norm_layer = NaiveSyncBatchNorm
if dilated or dilation == 4:
self.add_module(name, self._make_layer(block, 256, stage_spec.block_count, stride=1,
dilation=2, norm_layer=norm_layer,
dropblock_prob=dropblock_prob))
elif dilation==2:
self.add_module(name, self._make_layer(block, 256, stage_spec.block_count, stride=2,
dilation=1, norm_layer=norm_layer,
dropblock_prob=dropblock_prob))
else:
self.add_module(name, self._make_layer(block, 256, stage_spec.block_count, stride=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob))
elif stage_spec.index == 4:
norm_layer = NaiveSyncBatchNorm
if dilated or dilation == 4:
self.add_module(name, self._make_layer(block, 512, stage_spec.block_count, stride=1,
dilation=4, norm_layer=norm_layer,
dropblock_prob=dropblock_prob))
elif dilation==2:
self.add_module(name, self._make_layer(block, 512, stage_spec.block_count, stride=1,
dilation=2, norm_layer=norm_layer,
dropblock_prob=dropblock_prob))
else:
self.add_module(name, self._make_layer(block, 512, stage_spec.block_count, stride=2,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob))
else:
print("error index!!!!!!!!!!!!!!!!!!!!!!!!!!")
self.stages.append(name)
self.return_features[name] = stage_spec.return_features
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, norm_layer):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None,
dropblock_prob=0.0, is_first=True):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
down_layers = []
if self.avg_down:
if dilation == 1:
down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride,
ceil_mode=True, count_include_pad=False))
else:
down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1,
ceil_mode=True, count_include_pad=False))
down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=1, bias=False))
else:
down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False))
down_layers.append(norm_layer(planes * block.expansion))
downsample = nn.Sequential(*down_layers)
layers = []
if dilation == 1 or dilation == 2:
layers.append(block(self.inplanes, planes, stride, downsample=downsample,
radix=self.radix, cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd, avd_first=self.avd_first,
dilation=1, is_first=is_first, rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer, dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
elif dilation == 4:
layers.append(block(self.inplanes, planes, stride, downsample=downsample,
radix=self.radix, cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd, avd_first=self.avd_first,
dilation=2, is_first=is_first, rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer, dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
else:
raise RuntimeError("=> unknown dilation size: {}".format(dilation))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes,
radix=self.radix, cardinality=self.cardinality,
bottleneck_width=self.bottleneck_width,
avd=self.avd, avd_first=self.avd_first,
dilation=dilation, rectified_conv=self.rectified_conv,
rectify_avg=self.rectify_avg,
norm_layer=norm_layer, dropblock_prob=dropblock_prob,
last_gamma=self.last_gamma))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outputs = []
inter = None
for name in self.stages:
x = getattr(self, name)(x)
if self.return_features[name]:
outputs.append(x)
return outputs
class ResnestBottleneck(nn.Module):
"""ResNeSt Bottleneck
"""
# pylint: disable=unused-argument
expansion = 4 # 输出通道数的倍数
def __init__(self, inplanes, planes, stride=1, downsample=None,
radix=1, cardinality=1, bottleneck_width=64,
avd=False, avd_first=False, dilation=1, is_first=False,
rectified_conv=False, rectify_avg=False,
norm_layer=nn.BatchNorm2d, dropblock_prob=0.0, last_gamma=False):
super(ResnestBottleneck, self).__init__()
group_width = int(planes * (bottleneck_width / 64.)) * cardinality
self.conv1 = nn.Conv2d(inplanes, group_width, kernel_size=1, bias=False)
self.bn1 = norm_layer(group_width)
self.dropblock_prob = dropblock_prob
self.radix = radix
self.avd = avd and (stride > 1 or is_first)
self.avd_first = avd_first
if self.avd:
self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
stride = 1
if dropblock_prob > 0.0:
self.dropblock1 = DropBlock2D(dropblock_prob, 3)
if radix == 1:
self.dropblock2 = DropBlock2D(dropblock_prob, 3)
self.dropblock3 = DropBlock2D(dropblock_prob, 3)
if radix >= 1:
self.conv2 = SplAtConv2d(
group_width, group_width, kernel_size=3,
stride=stride, padding=dilation,
dilation=dilation, groups=cardinality, bias=False,
radix=radix, rectify=rectified_conv,
rectify_avg=rectify_avg,
norm_layer=norm_layer,
dropblock_prob=dropblock_prob)
elif rectified_conv:
from rfconv import RFConv2d
self.conv2 = RFConv2d(
group_width, group_width, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation,
groups=cardinality, bias=False,
average_mode=rectify_avg)
self.bn2 = norm_layer(group_width)
else:
self.conv2 = nn.Conv2d(
group_width, group_width, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation,
groups=cardinality, bias=False)
self.bn2 = norm_layer(group_width)
self.conv3 = nn.Conv2d(
group_width, planes * 4, kernel_size=1, bias=False)
self.bn3 = norm_layer(planes*4)
if last_gamma:
from torch.nn.init import zeros_
zeros_(self.bn3.weight)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dilation = dilation
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
if self.dropblock_prob > 0.0:
out = self.dropblock1(out)
out = self.relu(out)
if self.avd and self.avd_first:
out = self.avd_layer(out)
out = self.conv2(out)
if self.radix == 0:
out = self.bn2(out)
if self.dropblock_prob > 0.0:
out = self.dropblock2(out)
out = self.relu(out)
if self.avd and not self.avd_first:
out = self.avd_layer(out)
out = self.conv3(out)
out = self.bn3(out)
if self.dropblock_prob > 0.0:
out = self.dropblock3(out)
if self.downsample is not None:
residual = self.downsample(x)
# print("res", residual.shape)
out += residual
out = self.relu(out)
return out
StageSpec = namedtuple(
"StageSpec",
[
"index", # Index of the stage, eg 1, 2, ..,. 5
"block_count", # Number of residual blocks in the stage
"return_features", # True => return the last feature map from this stage
],
)
_STAGE_SPECS = Registry({
"ResNeSt-101-FPN": ResNeSt101FPNStagesTo5,
})
I have no experience on maskrcnn-benchmark framework. I find the default std values for input images in maskrcnn-benchmark are [1,1,1,] https://github.com/facebookresearch/maskrcnn-benchmark/blob/f027259943d6caabb95311f2b292c31a766dafa5/maskrcnn_benchmark/config/defaults.py#L53. It should be replaced with the actual std values.
And I find _C.INPUT.TO_BGR255 = True
in this file. Do I need to set it as False?
I have replaced the mean and std, but it doesn't work.
Not sure about issues in maskrcnn-benchmark framework. Since it has been deprecated, it is recommended to use detectron2 or mmdetection.
Thanks, but the project depends on maskrcnn-benchmark framework. I will try to use detectron2.
I use gradient clip to figure it out. But it is so wired that the gradient overflows even if I use the pre-trained model.
I use gradient clip to figure it out. But it is so wired that the gradient overflows even if I use the pre-trained model.
Did you meet the gradient explosion? I had the nan loss while training.
I use gradient clip to figure it out. But it is so wired that the gradient overflows even if I use the pre-trained model.
Did you meet the gradient explosion? I had the nan loss while training.
Yes, if you always get nan, you can use _n.utils.clip_grad_norm__ to train it again.
I use mask-rcnn benchmark framework, and replace the backbone with resnest. But loss is always nan.
There is model structure, is there anything wrong: