Open daiguangzhao opened 4 years ago
感谢关注。报错是什么? 有log么?
感谢郑博回复,代码没有报错,只是准确率为1,损失基本为0,是因为我的数据集划分的问题吗,还是代码?下面是我训练数据集的结构,共有229085张图片 /home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/2020AICITY/aicity_all/image_train
你是不是就分了一个文件夹。。导致只有一类?
你是不是就分了一个文件夹。。导致只有一类?
是的,我的home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/2020AICITY/aicity_all中 只有image_train这个文件夹;这个文件夹包含了大赛的两个数据集中image_train的图片,共229085(36935+192150)
你需要跑一下 python prepare_2020.py 他的目的是 把每一类的图像放一个文件夹。
python prepare_2020.py以及python prepare_cam2020.py都跑了
您好!之前跑代码的条件是按照您github中推荐的设置条件:--name SE_imbalance_s1_384_p0.5_lr2_mt_d0_b24+v+aug --warm_epoch 5 --droprate 0 --stride 1 --erasing_p 0.5 --autoaug --inputsize 384 --lr 0.02 --use_SE --gpu_ids 0 --train_virtual --batchsize 8,损失基本为0,准确率接近1 但是,如果在Edit Congfiguation不去设置条件,仅今年只是采取train_2020中的默认条件去跑代码,损失和准确率就正常了,不会过分高.下面是默认条件的opt.yml
CPB: false PCB: false adam: false angle: false arc: false autoaug: false balance: false batchsize: 32 color_jitter: false data_dir: /home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/pytorch2020 droprate: 0.5 erasing_p: 0 fp16: false gpu_ids:
- 0 h: 299 init_name: imagenet inputsize: 299 lr: 0.05 name: ft_ResNet50 nclasses: 255 pool: avg resume: false stride: 2 train_all: false train_comp: false train_comp_veri: false train_milktea: false train_pku: false train_veri: false train_virtual: false use_DSE: false use_EF4: false use_EF5: false use_EF6: false use_IR: false use_NAS: false use_SE: false use_dense: false w: 299 warm_epoch: 0
郑博士以及各位大神好,代码出现点问题,我一个EPOCH都没跑玩,准确率为1,损失基本为0: 条件是:--name SE_imbalance_s1_384_p0.5_lr2_mt_d0_b24+v+aug --warm_epoch 5 --droprate 0 --stride 1 --erasing_p 0.5 --autoaug --inputsize 384 --lr 0.02 --use_SE --gpu_ids 0 --train_virtual --batchsize 8
下面是代码
import argparse import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler from torch.autograd import Variable from torchvision import datasets, transforms import torch.backends.cudnn as cudnn import matplotlib
matplotlib.use('agg') import matplotlib.pyplot as plt
from PIL import Image
import time import os from losses import AngleLoss, ArcLoss from model import ft_net, ft_net_dense, ft_net_EF4, ft_net_EF5, ft_net_EF6, ft_net_IR, ft_net_NAS, ft_net_SE, \ ft_net_DSE, PCB, CPB, ft_net_angle, ft_net_arc from random_erasing import RandomErasing import yaml from AugFolder import AugFolder from shutil import copyfile import random from autoaugment import ImageNetPolicy from utils import get_model_list, load_network, save_network, make_weights_for_balanced_classes
version = torch.version
fp16
try: from apex.fp16_utils import * from apex import amp, optimizers except ImportError: # will be 3.x series print( 'This is not an error. If you want to use low precision, i.e., fp16, please install the apex with cuda support (https://github.com/NVIDIA/apex) and update pytorch to 1.0')
make the output
if not os.path.isdir('/home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/outputs'): os.mkdir('/home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/outputs') ######################################################################
Options
--------
parser = argparse.ArgumentParser(description='Training') parser.add_argument('--gpu_ids', default='0', type=str, help='gpu_ids: e.g. 0 0,1,2 0,2') parser.add_argument('--adam', action='store_true', help='use all training data') parser.add_argument('--name', default='ft_ResNet50', type=str, help='output model name') parser.add_argument('--init_name', default='imagenet', type=str, help='initial with ImageNet') parser.add_argument('--data_dir', default='/home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/pytorch2020', type=str, help='training dir path') parser.add_argument('--train_all', action='store_true', help='use all training data') parser.add_argument('--train_veri', action='store_true', help='use part training data + veri') parser.add_argument('--train_virtual', action='store_true', help='use part training data + virtual') parser.add_argument('--train_comp', action='store_true', help='use part training data + comp') parser.add_argument('--train_pku', action='store_true', help='use part training data + pku') parser.add_argument('--train_comp_veri', action='store_true', help='use part training data + comp +veri') parser.add_argument('--train_milktea', action='store_true', help='use part training data + com + veri+pku') parser.add_argument('--color_jitter', action='store_true', help='use color jitter in training') parser.add_argument('--batchsize', default=32, type=int, help='batchsize') parser.add_argument('--inputsize', default=299, type=int, help='batchsize') parser.add_argument('--h', default=299, type=int, help='height') parser.add_argument('--w', default=299, type=int, help='width') parser.add_argument('--stride', default=2, type=int, help='stride') parser.add_argument('--pool', default='avg', type=str, help='last pool') parser.add_argument('--autoaug', action='store_true', help='use Color Data Augmentation') parser.add_argument('--erasing_p', default=0, type=float, help='Random Erasing probability, in [0,1]') parser.add_argument('--use_dense', action='store_true', help='use densenet121') parser.add_argument('--use_NAS', action='store_true', help='use nasnetalarge') parser.add_argument('--use_SE', action='store_true', help='use se_resnext101_32x4d') parser.add_argument('--use_DSE', action='store_true', help='use senet154') parser.add_argument('--use_IR', action='store_true', help='use InceptionResNetv2') parser.add_argument('--use_EF4', action='store_true', help='use EF4') parser.add_argument('--use_EF5', action='store_true', help='use EF5') parser.add_argument('--use_EF6', action='store_true', help='use EF6') parser.add_argument('--lr', default=0.05, type=float, help='learning rate') parser.add_argument('--droprate', default=0.5, type=float, help='drop rate') parser.add_argument('--PCB', action='store_true', help='use PCB+ResNet50') parser.add_argument('--CPB', action='store_true', help='use Center+ResNet50') parser.add_argument('--fp16', action='store_true', help='use float16 instead of float32, which will save about 50% memory') parser.add_argument('--balance', action='store_true', help='balance sample') parser.add_argument('--angle', action='store_true', help='use angle loss') parser.add_argument('--arc', action='store_true', help='use arc loss') parser.add_argument('--warm_epoch', default=0, type=int, help='the first K epoch that needs warm up') parser.add_argument('--resume', action='store_true', help='use arc loss') opt = parser.parse_args()
if opt.resume: model, opt, start_epoch = load_network(opt.name, opt) else: start_epoch = 0
print(start_epoch)
fp16 = opt.fp16 data_dir = opt.data_dir name = opt.name
if not opt.resume: str_ids = opt.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >= 0: gpu_ids.append(gid) opt.gpu_ids = gpu_ids
set gpu ids
if len(opt.gpu_ids) > 0: cudnn.enabled = True cudnn.benchmark = True ######################################################################
Load Data
---------
#
if opt.h == opt.w: transform_train_list = [
transforms.RandomRotation(30),
else: transform_train_list = [
transforms.RandomRotation(30),
if opt.PCB: transform_train_list = [ transforms.Resize((384, 192), interpolation=3), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] transform_val_list = [ transforms.Resize(size=(384, 192), interpolation=3), # Image.BICUBIC transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]
if opt.erasing_p > 0: transform_train_list = transform_train_list + [RandomErasing(probability=opt.erasing_p, mean=[0.0, 0.0, 0.0])]
if opt.color_jitter: transform_train_list = [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0)] + transform_train_list
transform_train_list_aug = [ImageNetPolicy()] + transform_train_list
print(transform_train_list) data_transforms = { 'train': transforms.Compose(transform_train_list), 'train_aug': transforms.Compose(transform_train_list_aug), 'val': transforms.Compose(transform_val_list), }
train_all = '' if opt.train_all: train_all = '_all'
if opt.train_veri: train_all = '+veri'
if opt.train_comp: train_all = '+comp'
if opt.train_virtual: train_all = '+virtual'
if opt.train_pku: train_all = '+pku'
if opt.train_comp_veri: train_all = '+comp+veri'
if opt.train_milktea: train_all = '+comp+veri+pku'
image_datasets = {}
if not opt.autoaug: image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all), data_transforms['train']) else: image_datasets['train'] = AugFolder(os.path.join(data_dir, 'train' + train_all), data_transforms['train'], data_transforms['train_aug'])
if opt.balance: dataset_train = image_datasets['train'] weights = make_weights_for_balanced_classes(dataset_train.imgs, len(dataset_train.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) dataloaders = {} dataloaders['train'] = torch.utils.data.DataLoader(image_datasets['train'], batch_size=opt.batchsize, sampler=sampler, num_workers=8, pin_memory=True) # 8 workers may work faster else: dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=8, pin_memory=True)
8 workers may work faster
dataset_sizes = {x: len(image_datasets[x]) for x in ['train']} class_names = image_datasets['train'].classes
use_gpu = torch.cuda.is_available()
since = time.time()
inputs, classes = next(iter(dataloaders['train']))
print(time.time()-since)
######################################################################
Training the model
------------------
#
Now, let's write a general function to train a model. Here, we will
illustrate:
#
- Scheduling the learning rate
- Saving the best model
#
In the following, parameter
scheduler
is an LR scheduler object fromtorch.optim.lr_scheduler
.y_loss = {} # loss history y_loss['train'] = [] y_loss['val'] = [] y_err = {} y_err['train'] = [] y_err['val'] = []
def train_model(model, criterion, optimizer, scheduler, start_epoch=0, num_epochs=25): since = time.time()
######################################################################
Draw Curve
---------------------------
x_epoch = [] fig = plt.figure() ax0 = fig.add_subplot(121, title="loss") ax1 = fig.add_subplot(122, title="top1err")
def draw_curve(current_epoch): x_epoch.append(current_epoch) ax0.plot(x_epoch, y_loss['train'], 'bo-', label='train')
ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val')
######################################################################
Finetuning the convnet
----------------------
#
Load a pretrainied model and reset final fully connected layer.
#
if not opt.resume: opt.nclasses = len(class_names) if opt.use_dense: model = ft_net_dense(len(class_names), opt.droprate, opt.stride, None, opt.pool) elif opt.use_NAS: model = ft_net_NAS(len(class_names), opt.droprate, opt.stride) elif opt.use_SE: model = ft_net_SE(len(class_names), opt.droprate, opt.stride, opt.pool) elif opt.use_DSE: model = ft_net_DSE(len(class_names), opt.droprate, opt.stride, opt.pool) elif opt.use_IR: model = ft_net_IR(len(class_names), opt.droprate, opt.stride) elif opt.use_EF4: model = ft_net_EF4(len(class_names), opt.droprate) elif opt.use_EF5: model = ft_net_EF5(len(class_names), opt.droprate) elif opt.use_EF6: model = ft_net_EF6(len(class_names), opt.droprate) else: model = ft_net(len(class_names), opt.droprate, opt.stride, None, opt.pool)
if opt.init_name != 'imagenet': old_opt = parser.parse_args() init_model, oldopt, = load_network(opt.init_name, old_opt) print(old_opt) opt.stride = old_opt.stride opt.pool = old_opt.pool opt.use_dense = old_opt.use_dense if opt.use_dense: model = ft_net_dense(opt.nclasses, droprate=opt.droprate, stride=opt.stride, init_model=init_model, pool=opt.pool) else: model = ft_net(opt.nclasses, droprate=opt.droprate, stride=opt.stride, init_model=init_model, pool=opt.pool)
##########################
Put model parameter in front of the optimizer!!!
For resume:
if start_epoch >= 60: opt.lr = opt.lr 0.1 if start_epoch >= 75: opt.lr = opt.lr 0.1
if len(opt.gpu_ids) > 1: model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids).cuda() if not opt.CPB: ignored_params = list(map(id, model.module.classifier.parameters())) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer_ft = optim.SGD([ {'params': base_params, 'lr': 0.1 * opt.lr}, {'params': model.module.classifier.parameters(), 'lr': opt.lr} ], weight_decay=5e-4, momentum=0.9, nesterov=True) else: ignored_params = (list(map(id, model.module.classifier0.parameters()))
if opt.adam: optimizer_ft = optim.Adam(model.parameters(), opt.lr, weight_decay=5e-4)
Decay LR by a factor of 0.1 every 40 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)
exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[60 - start_epoch, 75 - start_epoch], gamma=0.1)
######################################################################
Train and evaluate
^^^^^^^^^^^^^^^^^^
#
It should take around 1-2 hours on GPU.
# dir_name = os.path.join('/home/ubuntu-guangzhaodai/Desktop/AICIty-reID-2020/data/outputs', name)
if not opt.resume: if not os.path.isdir(dir_name): os.mkdir(dir_name)
record every run
model to gpu
if fp16:
model = network_to_half(model)
if opt.angle: criterion = AngleLoss() elif opt.arc: criterion = ArcLoss() else: criterion = nn.CrossEntropyLoss()
print(model) model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, start_epoch=start_epoch, num_epochs=80)