Closed 10qianghua closed 4 years ago
这是网络层代码 import torch import math import torch.nn as nn from net.resnest import *
from net.l2norm import L2Norm
class CSPNet(nn.Module): def init(self): super(CSPNet, self).init()
#resnest = resnest50(pretrained=True, receptive_keep=True)
resnest = resnest50(pretrained=True)
self.conv1 = resnest.conv1
self.bn1 = resnest.bn1
self.relu = resnest.relu
self.maxpool = resnest.maxpool
self.layer1 = resnest.layer1
self.layer2 = resnest.layer2
self.layer3 = resnest.layer3
self.layer4 = resnest.layer4
self.p3 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1)
self.p4 = nn.ConvTranspose2d(1024, 256, kernel_size=4, stride=4, padding=0)
self.p5 = nn.ConvTranspose2d(2048, 256, kernel_size=8, stride=8, padding=0)
nn.init.xavier_normal_(self.p3.weight)
nn.init.xavier_normal_(self.p4.weight)
nn.init.xavier_normal_(self.p5.weight)
nn.init.constant_(self.p3.bias, 0)
nn.init.constant_(self.p4.bias, 0)
nn.init.constant_(self.p5.bias, 0)
self.p3_l2 = L2Norm(256, 10)
self.p4_l2 = L2Norm(256, 10)
self.p5_l2 = L2Norm(256, 10)
self.feat = nn.Conv2d(768, 256, kernel_size=3, stride=1, padding=1, bias=False)
self.feat_bn = nn.BatchNorm2d(256, momentum=0.01)
self.feat_act = nn.ReLU(inplace=True)
self.pos_conv = nn.Conv2d(256, 1, kernel_size=1)
self.reg_conv = nn.Conv2d(256, 1, kernel_size=1)
self.off_conv = nn.Conv2d(256, 2, kernel_size=1)
nn.init.xavier_normal_(self.feat.weight)
nn.init.xavier_normal_(self.pos_conv.weight)
nn.init.xavier_normal_(self.reg_conv.weight)
nn.init.xavier_normal_(self.off_conv.weight)
nn.init.constant_(self.pos_conv.bias, -math.log(0.99/0.01))
nn.init.constant_(self.reg_conv.bias, 0)
nn.init.constant_(self.off_conv.bias, 0)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
p3 = self.p3(x)
p3 = self.p3_l2(p3)
x = self.layer3(x)
p4 = self.p4(x)
p4 = self.p4_l2(p4)
x = self.layer4(x)
p5 = self.p5(x)
p5 = self.p5_l2(p5)
cat = torch.cat([p3, p4, p5], dim=1)
feat = self.feat(cat)
feat = self.feat_bn(feat)
feat = self.feat_act(feat)
x_cls = self.pos_conv(feat)
x_cls = torch.sigmoid(x_cls)
x_reg = self.reg_conv(feat)
x_off = self.off_conv(feat)
return x_cls, x_reg, x_off
# def train(self, mode=True):
# # Override train so that the training mode is set as we want
# nn.Module.train(self, mode)
# if mode:
# # Set fixed blocks to be in eval mode
# self.conv1.eval()
# self.layer1.eval()
#
# # bn is trainable in CONV2
# def set_bn_train(m):
# class_name = m.__class__.__name__
# if class_name.find('BatchNorm') != -1:
# m.train()
# else:
# m.eval()
# self.layer1.apply(set_bn_train)
#
# # # # #
class CSPNet_mod(nn.Module):
# under construction !!!!!!!!!!!!!!!!!!!
def __init__(self):
super(CSPNet_mod, self).__init__()
resnest = resnest50(pretrained=True, receptive_keep=True)
self.conv1 = resnest.conv1
self.bn1 = resnest.bn1
self.relu = resnest.relu
self.maxpool = resnest.maxpool
self.layer1 = resnest.layer1
self.layer2 = resnest.layer2
self.layer3 = resnest.layer3
self.layer4 = resnest.layer4
self.p3 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1)
self.p4 = nn.ConvTranspose2d(1024, 256, kernel_size=4, stride=4, padding=0)
self.p5 = nn.ConvTranspose2d(2048, 256, kernel_size=4, stride=4, padding=0)
nn.init.xavier_normal_(self.p3.weight)
nn.init.xavier_normal_(self.p4.weight)
nn.init.xavier_normal_(self.p5.weight)
nn.init.constant_(self.p3.bias, 0)
nn.init.constant_(self.p4.bias, 0)
nn.init.constant_(self.p5.bias, 0)
self.p3_l2 = L2Norm(256, 10)
self.p4_l2 = L2Norm(256, 10)
self.p5_l2 = L2Norm(256, 10)
self.feat = nn.Conv2d(768, 256, kernel_size=3, stride=1, padding=1, bias=True)
self.feat_act = nn.ReLU(inplace=True)
self.pos_conv = nn.Conv2d(256, 1, kernel_size=1)
self.reg_conv = nn.Conv2d(256, 1, kernel_size=1)
self.off_conv = nn.Conv2d(256, 2, kernel_size=1)
nn.init.xavier_normal_(self.feat.weight)
nn.init.xavier_normal_(self.pos_conv.weight)
nn.init.xavier_normal_(self.reg_conv.weight)
nn.init.xavier_normal_(self.off_conv.weight)
nn.init.constant_(self.feat.bias, 0)
nn.init.constant_(self.reg_conv.bias, -math.log(0.99/0.01))
nn.init.constant_(self.pos_conv.bias, 0)
nn.init.constant_(self.off_conv.bias, 0)
for p in self.conv1.parameters():
p.requires_grad = False
for p in self.bn1.parameters():
p.requires_grad = False
for p in self.layer1.parameters():
p.requires_grad = False
def set_bn_fix(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
for p in m.parameters(): p.requires_grad = False
self.layer2.apply(set_bn_fix)
self.layer3.apply(set_bn_fix)
self.layer4.apply(set_bn_fix)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
p3 = self.p3(x)
p3 = self.p3_l2(p3)
x = self.layer3(x)
p4 = self.p4(x)
p4 = self.p4_l2(p4)
x = self.layer4(x)
p5 = self.p5(x)
p5 = self.p5_l2(p5)
cat = torch.cat([p3, p4, p5], dim=1)
feat = self.feat(cat)
feat = self.feat_act(feat)
x_cls = self.pos_conv(feat)
x_cls = torch.sigmoid(x_cls)
x_reg = self.reg_conv(feat)
x_off = self.off_conv(feat)
return x_cls, x_reg, x_off
def train(self, mode=True):
# Override train so that the training mode is set as we want
nn.Module.train(self, mode)
if mode:
# Set fixed blocks to be in eval mode
self.conv1.eval()
self.bn1.eval()
self.layer1.eval()
def set_bn_eval(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.eval()
self.layer2.apply(set_bn_eval)
self.layer3.apply(set_bn_eval)
self.layer4.apply(set_bn_eval)
原网络框架是resnet50,想将行人检测中的resnet网络替换成resnest网络,遇到这个问题无法解决:ValueError:Expected more than 1 value per channel when training,got input size torch.Size([1,32,1,1])
这个我在刚才的issue里回了,主要是batch size的问题,training的时候batch size不能是 1,如果是inference 要用 evaluation mode model.eval()
。
https://github.com/zhanghang1989/ResNeSt/issues/70#issuecomment-635037833
好的,谢谢您
---原始邮件--- 发件人: "Hang Zhang"<notifications@github.com> 发送时间: 2020年5月28日(周四) 上午9:58 收件人: "zhanghang1989/ResNeSt"<ResNeSt@noreply.github.com>; 抄送: "Author"<author@noreply.github.com>;"10qianghua"<712703945@qq.com>; 主题: Re: [zhanghang1989/ResNeSt] 将此网络应用在下游的行人检测框架中,遇到此问题无法解决 (#71)
这个我在刚才的issue里回了,主要是batch size的问题,training的时候batch size不能是 1,如果是inference 要用 evaluation mode model.eval()。
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.
您好,我的batch size不为1,是6,训练的batch size是这样计算的 print('Dataset...') traintransform = Compose( [ColorJitter(brightness=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) traindataset = CityPersons(path=config.train_path, type='train', config=config, transform=traintransform) trainloader = DataLoader(traindataset, batch_size=config.onegpu*len(config.gpu_ids)) config.gpu_ids = [0, 2, 3] config.onegpu = 2
------------------ 原始邮件 ------------------ 发件人: "Hang Zhang"<notifications@github.com>; 发送时间: 2020年5月28日(星期四) 上午9:58 收件人: "zhanghang1989/ResNeSt"<ResNeSt@noreply.github.com>; 抄送: "712703945"<712703945@qq.com>;"Author"<author@noreply.github.com>; 主题: Re: [zhanghang1989/ResNeSt] 将此网络应用在下游的行人检测框架中,遇到此问题无法解决 (#71)
这个我在刚才的issue里回了,主要是batch size的问题,training的时候batch size不能是 1,如果是inference 要用 evaluation mode model.eval()。
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.
我指的是 per device 的batch size 也要大于一,也就是这里的 config.onegpu
。这样也不行吗?
谢谢老师,确实是batch size的问题,我用batch训练的时候有单数,导致最后gpu分配有问题,我按照您说的修改了下,已经解决了,再次感谢您!
---原始邮件--- 发件人: "Hang Zhang"<notifications@github.com> 发送时间: 2020年5月28日(周四) 中午1:03 收件人: "zhanghang1989/ResNeSt"<ResNeSt@noreply.github.com>; 抄送: "Author"<author@noreply.github.com>;"10qianghua"<712703945@qq.com>; 主题: Re: [zhanghang1989/ResNeSt] 将此网络应用在下游的行人检测框架中,遇到此问题无法解决 (#71)
我指的是 per device 的batch size 也要大于一,也就是这里的 config.onegpu。这样也不行吗?
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.
这是主函数代码 import os import time import torch import json import torch.optim as optim from copy import deepcopy from torch.utils.data import DataLoader from torchvision.transforms import ToTensor, Normalize, Compose, ColorJitter
from net.loss import from net.network import CSPNet, CSPNet_mod from config import Config from dataloader.loader import from util.functions import parse_det_offset from eval_city.eval_script.eval_demo import validate
config = Config() config.train_path = './data/citypersons' config.test_path = './data/citypersons' config.gpu_ids = [0, 2, 3] config.onegpu = 2 config.size_train = (640, 1280) config.size_test = (1024, 2048) config.init_lr = 2e-4 config.num_epochs = 120 config.offset = True config.val = True config.val_frequency = 1
dataset
print('Dataset...') traintransform = Compose( [ColorJitter(brightness=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) traindataset = CityPersons(path=config.train_path, type='train', config=config, transform=traintransform) trainloader = DataLoader(traindataset, batch_size=config.onegpu*len(config.gpu_ids))
if config.val: testtransform = Compose( [ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) testdataset = CityPersons(path=config.train_path, type='val', config=config, transform=testtransform, preloaded=True) testloader = DataLoader(testdataset, batch_size=1)
net
print('Net...') net = CSPNet().cuda()
To continue training
net.load_state_dict(torch.load('./ckpt/DataParallel-9.pth'))
position
center = cls_pos().cuda() height = reg_pos().cuda() offset = offset_pos().cuda()
optimizer
params = [] for n, p in net.named_parameters(): if p.requires_grad: params.append({'params': p}) else: print(n)
if config.teacher: print('I found this teacher model is useless, I disable this training option') exit(1) teacher_dict = net.state_dict()
if len(config.gpu_ids) > 1:
net = nn.DataParallel(net, device_ids=config.gpu_ids)
optimizer = optim.Adam(params, lr=config.init_lr)
batchsize = config.onegpu * len(config.gpu_ids) train_batches = len(trainloader)
config.print_conf()
def criterion(output, label): cls_loss = center(output[0], label[0]) reg_loss = height(output[1], label[1]) off_loss = offset(output[2], label[2]) return cls_loss, reg_loss, off_loss
def train():
def val(log=None): net.eval()
if name == 'main': train()
val()