Open atul2oct opened 4 years ago
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
i don't change any code. does module 'model' have to be download? and what is path dir for model? model folder is empty i have executed prepare.py successfully that created dataset now i am tring to run train file i have installed pytorch using " conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch " what else are required to download? i have just started learning deep learning appreciated for your support
i don't change any code.except line 30 parser.add_argument('--data_dir',default='./dataset/DukeMTMC_prepare/',type=str, help='training dir path') does module 'model' have to be download? and what is path dir for model? model folder is empty , if have downloaded the model folder with files from https://github.com/layumi/Person_reID_baseline_pytorch but the error is still same
i have executed prepare.py successfully that created dataset now i am tring to run train file i have installed pytorch using " conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch " what else are required to download? i have just started learning deep learning appreciated for your support
these are screenshot of files and path of project
On Thu, Nov 5, 2020 at 7:26 PM Wanggcong notifications@github.com wrote:
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/Wanggcong/Spatial-Temporal-Re-identification/issues/35#issuecomment-722393501, or unsubscribe https://github.com/notifications/unsubscribe-auth/ANK34ZA7X4E5LI4LZAPKRXLSOKVJFANCNFSM4TKNQPKA .
from future import print_function, division
import argparse import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler from torch.autograd import Variable import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt from PIL import Image import time import os from model import ft_net, ft_net_dense, PCB from random_erasing import RandomErasing import json
######################################################################
parser = argparse.ArgumentParser(description='Training') parser.add_argument('--gpu_ids',default='0', type=str,help='gpu_ids: e.g. 0 0,1,2 0,2') parser.add_argument('--name',default='ft_ResNet50', type=str, help='output model name') parser.add_argument('--data_dir',default='./dataset/DukeMTMC_prepare/',type=str, help='training dir path') parser.add_argument('--train_all', action='store_true', help='use all training data' ) parser.add_argument('--color_jitter', action='store_true', help='use color jitter in training' ) parser.add_argument('--batchsize', default=32, type=int, help='batchsize') parser.add_argument('--erasing_p', default=0, type=float, help='Random Erasing probability, in [0,1]') parser.add_argument('--use_dense', action='store_true', help='use densenet121' ) parser.add_argument('--PCB', action='store_true', help='use PCB+ResNet50' ) opt = parser.parse_args()
data_dir = opt.data_dir name = opt.name str_ids = opt.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >=0: gpu_ids.append(gid)
if len(gpu_ids)>0: torch.cuda.set_device(gpu_ids[0])
if not os.path.exists("./model/"): os.makedirs("./model/")
######################################################################
#
transform_train_list = [
transforms.Resize((288,144), interpolation=3),
transforms.RandomCrop((256,128)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
transform_val_list = [ transforms.Resize(size=(256,128),interpolation=3), #Image.BICUBIC transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]
if opt.PCB: transform_train_list = [ transforms.Resize((384,192), interpolation=3), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] transform_val_list = [ transforms.Resize(size=(384,192),interpolation=3), #Image.BICUBIC transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]
if opt.erasing_p>0: transform_train_list = transform_train_list + [RandomErasing(probability = opt.erasing_p, mean=[0.0, 0.0, 0.0])]
if opt.color_jitter: transform_train_list = [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0)] + transform_train_list
print(transform_train_list) data_transforms = { 'train': transforms.Compose( transform_train_list ), 'val': transforms.Compose(transform_val_list), }
train_all = '' if opt.train_all: train_all = '_all'
image_datasets = {} image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all), data_transforms['train']) image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val'])
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=16) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} class_names = image_datasets['train'].classes
use_gpu = torch.cuda.is_available()
inputs, classes = next(iter(dataloaders['train']))
######################################################################
#
#
#
scheduler
is an LR scheduler object fromtorch.optim.lr_scheduler
.y_loss = {} # loss history y_loss['train'] = [] y_loss['val'] = [] y_err = {} y_err['train'] = [] y_err['val'] = []
def train_model(model, criterion, optimizer, scheduler, num_epochs=25): since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for data in dataloaders[phase]:
# get the inputs
inputs, labels = data
#print(inputs.shape)
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
if not opt.PCB:
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
else:
part = {}
sm = nn.Softmax(dim=1)
num_part = 6
for i in range(num_part):
part[i] = outputs[i]
score = sm(part[0]) + sm(part[1]) +sm(part[2]) + sm(part[3]) +sm(part[4]) +sm(part[5])
_, preds = torch.max(score.data, 1)
loss = criterion(part[0], labels)
for i in range(num_part-1):
loss += criterion(part[i+1], labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels.data)
# print(running_corrects)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = (running_corrects.item()) / dataset_sizes[phase]
# print(running_corrects.item())
# print(dataset_sizes[phase])
# print(epoch_acc)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
y_loss[phase].append(epoch_loss)
y_err[phase].append(1.0-epoch_acc)
# deep copy the model
if phase == 'val':
last_model_wts = model.state_dict()
if epoch%10 == 9:
save_network(model, epoch)
draw_curve(epoch)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
#print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(last_model_wts)
save_network(model, 'last')
return model
######################################################################
x_epoch = [] fig = plt.figure() ax0 = fig.add_subplot(121, title="loss") ax1 = fig.add_subplot(122, title="top1err") def draw_curve(current_epoch): x_epoch.append(current_epoch) ax0.plot(x_epoch, y_loss['train'], 'bo-', label='train') ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val') ax1.plot(x_epoch, y_err['train'], 'bo-', label='train') ax1.plot(x_epoch, y_err['val'], 'ro-', label='val') if current_epoch == 0: ax0.legend() ax1.legend() fig.savefig( os.path.join('./model',name,'train.jpg'))
######################################################################
def save_network(network, epoch_label): savefilename = 'net%s.pth'% epoch_label save_path = os.path.join('./model',name,save_filename) torch.save(network.cpu().state_dict(), save_path) if torch.cuda.is_available: network.cuda(gpu_ids[0])
######################################################################
#
#
if opt.use_dense: model = ft_net_dense(len(class_names)) else: model = ft_net(len(class_names))
if opt.PCB: model = PCB(len(class_names))
print(model)
if use_gpu: model = model.cuda()
criterion = nn.CrossEntropyLoss()
if not opt.PCB: ignored_params = list(map(id, model.model.fc.parameters() )) + list(map(id, model.classifier.parameters() )) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer_ft = optim.SGD([ {'params': base_params, 'lr': 0.01}, {'params': model.model.fc.parameters(), 'lr': 0.1}, {'params': model.classifier.parameters(), 'lr': 0.1} ], weight_decay=5e-4, momentum=0.9, nesterov=True) else: ignored_params = list(map(id, model.model.fc.parameters() )) ignored_params += (list(map(id, model.classifier0.parameters() )) +list(map(id, model.classifier1.parameters() )) +list(map(id, model.classifier2.parameters() )) +list(map(id, model.classifier3.parameters() )) +list(map(id, model.classifier4.parameters() )) +list(map(id, model.classifier5.parameters() ))
#+list(map(id, model.classifier7.parameters() ))
)
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.01},
{'params': model.model.fc.parameters(), 'lr': 0.1},
{'params': model.classifier0.parameters(), 'lr': 0.1},
{'params': model.classifier1.parameters(), 'lr': 0.1},
{'params': model.classifier2.parameters(), 'lr': 0.1},
{'params': model.classifier3.parameters(), 'lr': 0.1},
{'params': model.classifier4.parameters(), 'lr': 0.1},
{'params': model.classifier5.parameters(), 'lr': 0.1},
#{'params': model.classifier6.parameters(), 'lr': 0.01},
#{'params': model.classifier7.parameters(), 'lr': 0.01}
], weight_decay=5e-4, momentum=0.9, nesterov=True)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)
######################################################################
#
# dir_name = os.path.join('./model',name) if not os.path.isdir(dir_name): os.mkdir(dir_name)
with open('%s/opts.json'%dir_name,'w') as fp: json.dump(vars(opt), fp, indent=1)
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=60)
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
i don't change any code. does module 'model' have to be download? and what is path dir for model? model folder is empty i have executed prepare.py successfully that created dataset now i am tring to run train file i have installed pytorch using " conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch " what else are required to download? i have just started learning deep learning appreciated for your support
This error is caused by python, not deep learning. First, you have to check if there exists a "model.py" file in your folder. If yes, you may try both python2 and python 3.
Thank you so much sir for your help,
i changed line (112) this code to num_workers=0 and it executed but then got this error
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 4.00 GiB total capacity; 2.71 GiB already allocated; 60.61 MiB free; 2.74 GiB reserved in total by PyTorch)
Training code almost similar to yours code worked with one warning
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],batch_size=opt.batchsize,shuffle=True,num_workers=0, pin_memory=True) # 8 workers may work faster
previously
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
OUTPUT of train.py(Person_reID_baseline_pytorch https://github.com/layumi/Person_reID_baseline_pytorch)
lr_scheduler.step()
before
optimizer.step()
. In PyTorch 1.1.0 and later, you should call them in the
opposite order: optimizer.step()
before lr_scheduler.step()
. Failure
to do this will result in PyTorch skipping the first value of the learning
rate schedule. See more details at https://pytorch.org/docs/stable/optim
.html#how-to-adjust-learning-rate
warnings.warn("Detected call of lr_scheduler.step()
before
optimizer.step()
. "
train Loss: 3.8366 Acc: 0.2820
val Loss: 1.8972 Acc: 0.5113
Training complete in 5m 2s
...
...
....
Epoch 59/59train Loss: 0.0198 Acc: 0.9994 val Loss: 0.0063 Acc: 0.9800 Training complete in 284m 15s
Training complete in 284m 15s
On Fri, Nov 6, 2020 at 5:37 AM Wanggcong notifications@github.com wrote:
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
i don't change any code. does module 'model' have to be download? and what is path dir for model? model folder is empty i have executed prepare.py successfully that created dataset now i am tring to run train file i have installed pytorch using " conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch " what else are required to download? i have just started learning deep learning appreciated for your support
This error is caused by python, not deep learning. First, you have to check if there exists a "model.py" file in your folder. If yes, you may try both python2 and python 3.
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/Wanggcong/Spatial-Temporal-Re-identification/issues/35#issuecomment-722716942, or unsubscribe https://github.com/notifications/unsubscribe-auth/ANK34ZGZ72CHTZ5XPFPIAUTSOM443ANCNFSM4TKNQPKA .
train.py file(Person_reID_baseline_pytorch)
UserWarning: Detected call of lr_scheduler.step()
before
optimizer.step()
. In PyTorch 1.1.0 and later, you should call them in the
opposite order: optimizer.step()
before lr_scheduler.step()
. Failure
to do this will result in PyTorch skipping the first value of the learning
rate schedule. See more details at https://pytorch.org/docs/stable/optim
.html#how-to-adjust-learning-rate
On Fri, Nov 6, 2020 at 9:35 PM ATUL YADAV www.myfoot@gmail.com wrote:
Thank you so much sir for your help,
i changed line (112) this code to num_workers=0 and it executed with one warning.
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 4.00 GiB total capacity; 2.71 GiB already allocated; 60.61 MiB free; 2.74 GiB reserved in total by PyTorch)
training code almost similar to yours code worked with one warning
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=0, pin_memory=True) # 8 workers may work faster
previously
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
OUTPUT of train.py(Person_reID_baseline_pytorch https://github.com/layumi/Person_reID_baseline_pytorch)
Epoch 0/59
C:\Users\ATUL\anaconda3\lib\site-packages\torch\optim\lr_scheduler.py:131: UserWarning: Detected call of
lr_scheduler.step()
beforeoptimizer.step()
. In PyTorch 1.1.0 and later, you should call them in the opposite order:optimizer.step()
beforelr_scheduler.step()
. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch .org/docs/stable/optim.html#how-to-adjust-learning-rate warnings.warn("Detected call oflr_scheduler.step()
beforeoptimizer.step()
. " train Loss: 3.8366 Acc: 0.2820 val Loss: 1.8972 Acc: 0.5113 Training complete in 5m 2s ... ... .... Epoch 59/59train Loss: 0.0198 Acc: 0.9994 val Loss: 0.0063 Acc: 0.9800 Training complete in 284m 15s
Training complete in 284m 15s
On Fri, Nov 6, 2020 at 5:37 AM Wanggcong notifications@github.com wrote:
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
i don't change any code. does module 'model' have to be download? and what is path dir for model? model folder is empty i have executed prepare.py successfully that created dataset now i am tring to run train file i have installed pytorch using " conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch " what else are required to download? i have just started learning deep learning appreciated for your support
This error is caused by python, not deep learning. First, you have to check if there exists a "model.py" file in your folder. If yes, you may try both python2 and python 3.
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/Wanggcong/Spatial-Temporal-Re-identification/issues/35#issuecomment-722716942, or unsubscribe https://github.com/notifications/unsubscribe-auth/ANK34ZGZ72CHTZ5XPFPIAUTSOM443ANCNFSM4TKNQPKA .
from future import print_function, division
import argparse import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler from torch.autograd import Variable from torchvision import datasets, transforms if name == 'main': torch.multiprocessing.freeze_support() import torch.backends.cudnn as cudnn import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt
import time import os from model import ft_net, ft_net_dense, ft_net_NAS, PCB from random_erasing import RandomErasing import yaml import math from shutil import copyfile
version = torch.version
try: from apex.fp16_utils import * from apex import amp, optimizers except ImportError: # will be 3.x series print('This is not an error. If you want to use low precision, i.e., fp16, please install the apex with cuda support (https://github.com/NVIDIA/apex) and update pytorch to 1.0') ######################################################################
parser = argparse.ArgumentParser(description='Training') parser.add_argument('--gpu_ids',default='0', type=str,help='gpu_ids: e.g. 0 0,1,2 0,2') parser.add_argument('--name',default='ft_ResNet50', type=str, help='output model name') parser.add_argument('--data_dir',default='./Market/pytorch',type=str, help='training dir path') parser.add_argument('--train_all', action='store_true', help='use all training data' ) parser.add_argument('--color_jitter', action='store_true', help='use color jitter in training' ) parser.add_argument('--batchsize', default=32, type=int, help='batchsize') parser.add_argument('--stride', default=2, type=int, help='stride') parser.add_argument('--erasing_p', default=0, type=float, help='Random Erasing probability, in [0,1]') parser.add_argument('--use_dense', action='store_true', help='use densenet121' ) parser.add_argument('--use_NAS', action='store_true', help='use NAS' ) parser.add_argument('--warm_epoch', default=0, type=int, help='the first K epoch that needs warm up') parser.add_argument('--lr', default=0.05, type=float, help='learning rate') parser.add_argument('--droprate', default=0.5, type=float, help='drop rate') parser.add_argument('--PCB', action='store_true', help='use PCB+ResNet50' ) parser.add_argument('--fp16', action='store_true', help='use float16 instead of float32, which will save about 50% memory' ) opt = parser.parse_args()
fp16 = opt.fp16 data_dir = opt.data_dir name = opt.name str_ids = opt.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >=0: gpu_ids.append(gid)
if len(gpu_ids)>0: torch.cuda.set_device(gpu_ids[0]) cudnn.benchmark = True ######################################################################
#
transform_train_list = [
transforms.Resize((256,128), interpolation=3),
transforms.Pad(10),
transforms.RandomCrop((256,128)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
transform_val_list = [ transforms.Resize(size=(256,128),interpolation=3), #Image.BICUBIC transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]
if opt.PCB: transform_train_list = [ transforms.Resize((384,192), interpolation=3), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] transform_val_list = [ transforms.Resize(size=(384,192),interpolation=3), #Image.BICUBIC transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]
if opt.erasing_p>0: transform_train_list = transform_train_list + [RandomErasing(probability = opt.erasing_p, mean=[0.0, 0.0, 0.0])]
if opt.color_jitter: transform_train_list = [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0)] + transform_train_list
print(transform_train_list) data_transforms = { 'train': transforms.Compose( transform_train_list ), 'val': transforms.Compose(transform_val_list), }
train_all = '' if opt.train_all: train_all = '_all'
image_datasets = {} image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all), data_transforms['train']) image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'), data_transforms['val'])
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=0, pin_memory=True) # 8 workers may work faster for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} class_names = image_datasets['train'].classes
use_gpu = torch.cuda.is_available()
since = time.time() inputs, classes = next(iter(dataloaders['train'])) print(time.time()-since) ######################################################################
#
#
#
scheduler
is an LR scheduler object fromtorch.optim.lr_scheduler
.y_loss = {} # loss history y_loss['train'] = [] y_loss['val'] = [] y_err = {} y_err['train'] = [] y_err['val'] = []
def train_model(model, criterion, optimizer, scheduler, num_epochs=25): since = time.time()
#best_model_wts = model.state_dict()
#best_acc = 0.0
warm_up = 0.1 # We start from the 0.1*lrRate
warm_iteration = round(dataset_sizes['train']/opt.batchsize)*opt.warm_epoch # first 5 epoch
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
# Iterate over data.
for data in dataloaders[phase]:
# get the inputs
inputs, labels = data
now_batch_size,c,h,w = inputs.shape
if now_batch_size<opt.batchsize: # skip the last batch
continue
#print(inputs.shape)
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda().detach())
labels = Variable(labels.cuda().detach())
else:
inputs, labels = Variable(inputs), Variable(labels)
# if we use low precision, input also need to be fp16
#if fp16:
# inputs = inputs.half()
# zero the parameter gradients
optimizer.zero_grad()
# forward
if phase == 'val':
with torch.no_grad():
outputs = model(inputs)
else:
outputs = model(inputs)
if not opt.PCB:
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
else:
part = {}
sm = nn.Softmax(dim=1)
num_part = 6
for i in range(num_part):
part[i] = outputs[i]
score = sm(part[0]) + sm(part[1]) +sm(part[2]) + sm(part[3]) +sm(part[4]) +sm(part[5])
_, preds = torch.max(score.data, 1)
loss = criterion(part[0], labels)
for i in range(num_part-1):
loss += criterion(part[i+1], labels)
# backward + optimize only if in training phase
if epoch<opt.warm_epoch and phase == 'train':
warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
loss *= warm_up
if phase == 'train':
if fp16: # we use optimier to backward loss
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
optimizer.step()
# statistics
if int(version[0])>0 or int(version[2]) > 3: # for the new version like 0.4.0, 0.5.0 and 1.0.0
running_loss += loss.item() * now_batch_size
else : # for the old version like 0.3.0 and 0.3.1
running_loss += loss.data[0] * now_batch_size
running_corrects += float(torch.sum(preds == labels.data))
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
y_loss[phase].append(epoch_loss)
y_err[phase].append(1.0-epoch_acc)
# deep copy the model
if phase == 'val':
last_model_wts = model.state_dict()
if epoch%10 == 9:
save_network(model, epoch)
draw_curve(epoch)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
#print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(last_model_wts)
save_network(model, 'last')
return model
######################################################################
x_epoch = [] fig = plt.figure() ax0 = fig.add_subplot(121, title="loss") ax1 = fig.add_subplot(122, title="top1err") def draw_curve(current_epoch): x_epoch.append(current_epoch) ax0.plot(x_epoch, y_loss['train'], 'bo-', label='train') ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val') ax1.plot(x_epoch, y_err['train'], 'bo-', label='train') ax1.plot(x_epoch, y_err['val'], 'ro-', label='val') if current_epoch == 0: ax0.legend() ax1.legend() fig.savefig( os.path.join('./model',name,'train.jpg'))
######################################################################
def save_network(network, epoch_label): savefilename = 'net%s.pth'% epoch_label save_path = os.path.join('./model',name,save_filename) torch.save(network.cpu().state_dict(), save_path) if torch.cuda.is_available(): network.cuda(gpu_ids[0])
######################################################################
#
#
if opt.use_dense: model = ft_net_dense(len(class_names), opt.droprate) elif opt.use_NAS: model = ft_net_NAS(len(class_names), opt.droprate) else: model = ft_net(len(class_names), opt.droprate, opt.stride)
if opt.PCB: model = PCB(len(class_names))
opt.nclasses = len(class_names)
print(model)
if not opt.PCB: ignored_params = list(map(id, model.classifier.parameters() )) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer_ft = optim.SGD([ {'params': base_params, 'lr': 0.1*opt.lr}, {'params': model.classifier.parameters(), 'lr': opt.lr} ], weight_decay=5e-4, momentum=0.9, nesterov=True) else: ignored_params = list(map(id, model.model.fc.parameters() )) ignored_params += (list(map(id, model.classifier0.parameters() )) +list(map(id, model.classifier1.parameters() )) +list(map(id, model.classifier2.parameters() )) +list(map(id, model.classifier3.parameters() )) +list(map(id, model.classifier4.parameters() )) +list(map(id, model.classifier5.parameters() ))
#+list(map(id, model.classifier7.parameters() ))
)
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.1*opt.lr},
{'params': model.model.fc.parameters(), 'lr': opt.lr},
{'params': model.classifier0.parameters(), 'lr': opt.lr},
{'params': model.classifier1.parameters(), 'lr': opt.lr},
{'params': model.classifier2.parameters(), 'lr': opt.lr},
{'params': model.classifier3.parameters(), 'lr': opt.lr},
{'params': model.classifier4.parameters(), 'lr': opt.lr},
{'params': model.classifier5.parameters(), 'lr': opt.lr},
#{'params': model.classifier6.parameters(), 'lr': 0.01},
#{'params': model.classifier7.parameters(), 'lr': 0.01}
], weight_decay=5e-4, momentum=0.9, nesterov=True)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)
######################################################################
#
# dir_name = os.path.join('./model',name) if not os.path.isdir(dir_name): os.mkdir(dir_name)
copyfile('./train.py', dir_name+'/train.py') copyfile('./model.py', dir_name+'/model.py')
with open('%s/opts.yaml'%dir_name,'w') as fp: yaml.dump(vars(opt), fp, default_flow_style=False)
model = model.cuda() if fp16:
#optimizer_ft = FP16_Optimizer(optimizer_ft, static_loss_scale = 128.0)
model, optimizer_ft = amp.initialize(model, optimizer_ft, opt_level = "O1")
criterion = nn.CrossEntropyLoss()
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=60)
i reduce the batch_size from 32 to 16 it worked can you please tell me what i put values to batch_size and num_workers or it varies from system to system And please tell me what is prerequisite knowledge is required to properly understand this code , Because i have small understanding of ML and python what else knowledge is required.
On Fri, Nov 6, 2020 at 9:37 PM ATUL YADAV www.myfoot@gmail.com wrote:
train.py file(Person_reID_baseline_pytorch) UserWarning: Detected call of
lr_scheduler.step()
beforeoptimizer.step()
. In PyTorch 1.1.0 and later, you should call them in the opposite order:optimizer.step()
beforelr_scheduler.step()
. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch .org/docs/stable/optim.html#how-to-adjust-learning-rateOn Fri, Nov 6, 2020 at 9:35 PM ATUL YADAV www.myfoot@gmail.com wrote:
Thank you so much sir for your help,
i changed line (112) this code to num_workers=0 and it executed with one warning.
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 4.00 GiB total capacity; 2.71 GiB already allocated; 60.61 MiB free; 2.74 GiB reserved in total by PyTorch)
training code almost similar to yours code worked with one warning
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=0, pin_memory=True) # 8 workers may work faster
previously
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
OUTPUT of train.py(Person_reID_baseline_pytorch https://github.com/layumi/Person_reID_baseline_pytorch)
Epoch 0/59
C:\Users\ATUL\anaconda3\lib\site-packages\torch\optim\lr_scheduler.py:131: UserWarning: Detected call of
lr_scheduler.step()
beforeoptimizer.step()
. In PyTorch 1.1.0 and later, you should call them in the opposite order:optimizer.step()
beforelr_scheduler.step()
. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch .org/docs/stable/optim.html#how-to-adjust-learning-rate warnings.warn("Detected call oflr_scheduler.step()
beforeoptimizer.step()
. " train Loss: 3.8366 Acc: 0.2820 val Loss: 1.8972 Acc: 0.5113 Training complete in 5m 2s ... ... .... Epoch 59/59train Loss: 0.0198 Acc: 0.9994 val Loss: 0.0063 Acc: 0.9800 Training complete in 284m 15s
Training complete in 284m 15s
On Fri, Nov 6, 2020 at 5:37 AM Wanggcong notifications@github.com wrote:
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
i don't change any code. does module 'model' have to be download? and what is path dir for model? model folder is empty i have executed prepare.py successfully that created dataset now i am tring to run train file i have installed pytorch using " conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch " what else are required to download? i have just started learning deep learning appreciated for your support
This error is caused by python, not deep learning. First, you have to check if there exists a "model.py" file in your folder. If yes, you may try both python2 and python 3.
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/Wanggcong/Spatial-Temporal-Re-identification/issues/35#issuecomment-722716942, or unsubscribe https://github.com/notifications/unsubscribe-auth/ANK34ZGZ72CHTZ5XPFPIAUTSOM443ANCNFSM4TKNQPKA .
I am sorry that I really do not know your questions and the corresponding solutions.
---ModuleNotFoundError: No module named 'model' -- Check if you import the model.
Modulation rrp code ^8.
Modulation rpc code ^9.
Modulation crp.
Error.
Data.
...gc.
...in. __
...cr.
Hello You have done excellent and impressive work. Actually, I am new in machine learning and I was trying to run the code but I was facing problems. It would be grateful if you help me my specs- i5 8300h , gtx 1050m ti 4 gb, ram 8 gb currently i am using this code on windows 10 if want to re-train a model using market 1501 dataset error- C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:14: UserWarning: nn.init.kaiming_normal is now deprecated in favor of nn.init.kaimingnormal. init.kaiming_normal(m.weight.data, a=0, mode='fanout') C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:15: UserWarning: nn.init.constant is now deprecated in favor of nn.init.constant. init.constant(m.bias.data, 0.0) C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:17: UserWarning: nn.init.normal is now deprecated in favor of nn.init.normal. init.normal(m.weight.data, 1.0, 0.02) C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:18: UserWarning: nn.init.constant is now deprecated in favor of nn.init.constant. init.constant(m.bias.data, 0.0) C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:23: UserWarning: nn.init.normal is now deprecated in favor of nn.init.normal. init.normal(m.weight.data, std=0.001) C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:24: UserWarning: nn.init.constant is now deprecated in favor of nn.init.constant. init.constant(m.bias.data, 0.0) net output size: torch.Size([8, 751]) 0 [Resize(size=(288, 144), interpolation=PIL.Image.BICUBIC), RandomCrop(size=(256, 128), padding=None), RandomHorizontalFlip(p=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), <random_erasing.RandomErasing object at 0x000001CD467C78B0>]
Traceback (most recent call last): File "", line 1, in
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "C:\Users\ATUL\Anaconda3\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "C:\Users\ATUL\Anaconda3\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "C:\Users\ATUL\Anaconda3\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\train_market.py", line 20, in
from model import ft_net, ft_net_dense, PCB
ModuleNotFoundError: No module named 'model'