Open zhongqiu1245 opened 2 years ago
@zhongqiu1245 You can try it:
init_cfg=[
dict(type='Pretrained', checkpoint='torchvision://resnet18'),
dict(type='Kaiming',override=[dict(name='custom_layer',layer='Conv2d')]),
dict(type='Constant', val=1, override=[dict(name='custom_layer',layer=['_BatchNorm', 'GroupNorm']])]
If you have any questions, you can give me feedback.
@hhaAndroid Thank you for your reply! I will try.
@hhaAndroid If the structure of the custom module is like this:
class Coustom_Module(BaseModule):
def __init__(self, ......):
super(Coustom_Block, self).__init__(xxx, xxx, ......)
self.conv1 = ConvModule(xxx, xxx, xxx, ......)
self.conv2 = ConvModule(xxx, xxx, xxx, ......)
def forword(self, x):
x=self.conv2(self.conv1(x))
and in main():
class Mixed_Backbone(BaseModule):
def __init__(self, ......):
......
for i, num_blocks in enumerate(self.stage_blocks):
# --------------------------resnet------------------
res_layer = self.make_res_layer(xxx, xxx, xxx, ……)
layer_name = f'layer{i + 1}'
self.add_module(layer_name, res_layer)
self.res_layers.append(layer_name)
# --------------------------custom------------------
custom_layer = Sequential (Custom_Module(xxx, xxx, …), Custom_Module(xxx, xxx, …))
custom_layer_name = f' custom_layer{i + 1}'
self.add_module(custom_layer_name, custom_layer)
self. custom_layers.append(custom_layer_name)
Should I fill in name
of override
with name=['custom_layer1', 'custom_layer2', 'custom_layer3', 'custom_layer4']
or with name=['custom_layer1.conv1', 'custom_layer1.conv2', 'custom_layer2.conv1', ......]
?
@zhongqiu1245 You can directly pass the corresponding init_cfg to custom_layer1. for example
custom_layer = Sequential (Custom_Module(xxx, xxx, …,init_cfg= dict(type='Kaiming',layer='Conv2d')), Custom_Module(xxx, xxx, …))
@hhaAndroid You mean like this?
class Mixed_Backbone(BaseModule):
def __init__(self, ......, init_resnet, init_custom):
super(Mixed_Backbone, self).__init__(init_resnet)
# --------------------------resnet------------------
......
# --------------------------custom------------------
custom_layer = Sequential (Custom_Module(xxx, ......, init_custom),
Custom_Module(xxx, ......, init_custom))
The inner init_custom has higher priority? It is not be overridden by init_resnet?
@zhongqiu1245 His traversal rule is from the outer layer to the inner layer, so it doesn’t matter.
@hhaAndroid I tried but it didn't seem to initialize correctly. for example:
from mmcv.runner import BaseModule
from mmcv.cnn import ConvModule
import torch
import torch.nn as nn
class Custom(BaseModule):
def __init__(self,
init_cfg=None):
super(Custom, self).__init__(init_cfg)
self.conv_1 = ConvModule(3, 10, kernel_size=3, padding=1)
self.conv_2 = nn.Conv2d(10, 10, kernel_size=3, padding=1)
def forward(self, x):
return self.conv_2(self.conv_1(x))
class Mixed(BaseModule):
def __init__(self,
init_cfg_mixed=None,
init_cfg_custom=None):
super(Mixed, self).__init__(init_cfg_mixed)
self.body = nn.Conv2d(1, 3, kernel_size=3, padding=1)
self.custom = Custom(init_cfg_custom)
def forward(self, x):
return self.custom(self.body(x))
init_cfg_mixed = dict(type='Constant', layer='Conv2d', val=1., bias=2.)
init_cfg_custom = dict(type='Constant', layer='Conv2d', val=5., bias=10.)
model = Mixed(init_cfg_mixed, init_cfg_custom).cuda()
model.init_weights()
print(model.body.weight) # mixed body but custom Correct
print("-"*100)
print(model.custom.conv_1.conv.weight) # ConvModule in custom Wrong
print("-"*100)
print(model.custom.conv_2.weight) # custom but ConvModule Correct
The ConvModule
can't be initialized by init_cfg_custom
. And somtimes, some convs in custom
will be initialized by init_cfg_mixed
if custom
is complex, which is weird.
I found that I need to override the def init_weights
if I wanted to initialize Mixed_Backbone
in this way.
Hi, dear authors! Thank you for your amazing job! I have a small question about init. I mixed the ResNet 18 with my custom module, which looks like as follow: I want to initialize all ResNet layers with ResNet 18 pre-trained file, and initialize all custom layers with:
or can describe like this:
How do I initialize this "mix backbone" in this way? Thank you!