Open liu824 opened 4 years ago
您好,我已经提取好了I3D特征的模型,但是CTCN没有I3D特征的接口,论文中是有I3D的对比实验的
目前使用其它特征的对比实验代码未release。使用I3D特征,可以修改ctcn_reader.py部分load_file函数代码以适配I3D特征输入。
我尝试了很久,但是还是一直卡在数据读取上面,我是打算将BMN中的数据集使用到CTCN上,我将数据集的标注文件都按照CTCN例子 的格式进行了更改,对原ctcn_reader.py进行了下列更改(数据读取的部分加了resize模块):
#
#
#
import os import random import cv2 import sys import numpy as np import gc import copy import multiprocessing
import logging logger = logging.getLogger(name) from scipy.interpolate import interp1d
try: import cPickle as pickle from cStringIO import StringIO except ImportError: import pickle from io import BytesIO
from .reader_utils import DataReader from models.ctcn.ctcn_utils import box_clamp1D, box_iou1D, BoxCoder
python_ver = sys.version_info
class CTCNReader(DataReader): """ Data reader for C-TCN model, which was stored as features extracted by prior networks dataset cfg: img_size, the temporal dimension size of input data root, the root dir of data snippet_length, snippet length when sampling filelist, the file list storing id and annotations of each data item rgb, the dir of rgb data flow, the dir of optical flow data batch_size, batch size of input data num_threads, number of threads of data processing
"""
def __init__(self, name, mode, cfg):
self.name = name
self.mode = mode
self.img_size = cfg.MODEL.img_size # 512
self.snippet_length = cfg.MODEL.snippet_length # 1
self.root = cfg.MODEL.root # root dir of data
self.filelist = cfg[mode.upper()]['filelist']
self.rgb = cfg[mode.upper()]['rgb']
self.flow = cfg[mode.upper()]['flow']
self.batch_size = cfg[mode.upper()]['batch_size']
self.num_threads = cfg[mode.upper()]['num_threads']
if (mode == 'test') or (mode == 'infer'):
self.num_threads = 1 # set num_threads as 1 for test and infer
def random_move(self, img, o_boxes, labels):
boxes = np.array(o_boxes)
mask = np.zeros(img.shape[0])
for i in boxes:
for j in range(i[0].astype('int'),
min(i[1].astype('int'), img.shape[0])):
mask[j] = 1
mask = (mask == 0)
bg = img[mask]
bg_len = bg.shape[0]
if bg_len < 5:
return img, boxes, labels
insert_place = random.sample(range(bg_len), len(boxes))
index = np.argsort(insert_place)
new_img = bg[0:insert_place[index[0]], :]
new_boxes = []
new_labels = []
for i in range(boxes.shape[0]):
new_boxes.append([
new_img.shape[0],
new_img.shape[0] + boxes[index[i]][1] - boxes[index[i]][0]
])
new_labels.append(labels[index[i]])
new_img = np.concatenate(
(new_img,
img[int(boxes[index[i]][0]):int(boxes[index[i]][1]), :]))
if i < boxes.shape[0] - 1:
new_img = np.concatenate(
(new_img,
bg[insert_place[index[i]]:insert_place[index[i + 1]], :]))
new_img = np.concatenate(
(new_img, bg[insert_place[index[len(boxes) - 1]]:, :]))
del img, boxes, mask, bg, labels
gc.collect()
return new_img, new_boxes, new_labels
def random_crop(self, img, boxes, labels, min_scale=0.3):
boxes = np.array(boxes)
labels = np.array(labels)
imh, imw = img.shape[:2]
params = [(0, imh)]
for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9):
for _ in range(100):
scale = random.uniform(0.3, 1)
h = int(imh * scale)
y = random.randrange(imh - h)
roi = [[y, y + h]]
ious = box_iou1D(boxes, roi)
if ious.min() >= min_iou:
params.append((y, h))
break
y, h = random.choice(params)
img = img[y:y + h, :]
center = (boxes[:, 0] + boxes[:, 1]) / 2
mask = (center[:] >= y) & (center[:] <= y + h)
if mask.any():
boxes = boxes[np.squeeze(mask.nonzero())] - np.array([[y, y]])
boxes = box_clamp1D(boxes, 0, h)
labels = labels[mask]
else:
boxes = [[0, 0]]
labels = [0]
return img, boxes, labels
def resize(self, img, boxes, size, random_interpolation=False):
'''Resize the input PIL image to given size.
If boxes is not None, resize boxes accordingly.
Args:
img: image to be resized.
boxes: (tensor) object boxes, sized [#obj,2].
size: (tuple or int)
- if is tuple, resize image to the size.
- if is int, resize the shorter side to the size while maintaining the aspect ratio.
random_interpolation: (bool) randomly choose a resize interpolation method.
Returns:
img: (cv2's numpy.ndarray) resized image.
boxes: (tensor) resized boxes.
Example:
>> img, boxes = resize(img, boxes, 600) # resize shorter side to 600
'''
h, w = img.shape[:2]
if h == size:
return img, boxes
if h == 0:
img = np.zeros((512, 402), np.float32)
return img, boxes
ow = w
oh = size
sw = 1
sh = float(oh) / h
method = random.choice([
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA
]) if random_interpolation else cv2.INTER_NEAREST
img = cv2.resize(img, (ow, oh), interpolation=method)
if boxes is not None:
boxes = boxes * np.array([sh, sh])
return img, boxes
def transform(self, feats, boxes, labels, mode):
feats = np.array(feats)
boxes = np.array(boxes)
labels = np.array(labels)
# print('name {}, labels {}'.format(fname, labels))
# logger.info('transform ok {}') #=============================>
if mode == 'train':
feats, boxes, labels = self.random_move(feats, boxes, labels)
feats, boxes, labels = self.random_crop(feats, boxes, labels)
# logger.info('random_crop ok ') #=============================>
feats, boxes = self.resize(
feats, boxes, size=self.img_size, random_interpolation=True)
h, w = feats.shape[:2]
img = feats.reshape(1, h, w)
Coder = BoxCoder()
boxes, labels = Coder.encode(boxes, labels)
# logger.info('encode ok ') #=============================>
if mode == 'test' or mode == 'valid':
feats, boxes = self.resize(feats, boxes, size=self.img_size)
h, w = feats.shape[:2]
img = feats.reshape(1, h, w)
Coder = BoxCoder()
boxes, labels = Coder.encode(boxes, labels)
# logger.info('transform ok {}') #=============================>
return img, boxes, labels
def resizeFeature(self,inputData,newSize):
# inputX: (temporal_length,feature_dimension) #
originalSize=len(inputData)
#print originalSize
if originalSize==1:
inputData=np.reshape(inputData,[-1])
return np.stack([inputData]*newSize)
x=np.array(range(originalSize))
f=interp1d(x,inputData,axis=0)
x_new=[i*float(originalSize-1)/(newSize-1) for i in range(newSize)]
y_new=f(x_new)
return y_new
def resizeFeatures(self,inputData,tscal,featuredim):
I=self.resizeFeature(inputData,tscal)#对时间特征进行resize
data=np.zeros(shape=(len(I),featuredim))#建立需要特征形状大小的数组
for index,feature in enumerate(I):
feature=self.resizeFeature(feature,featuredim)#对空间特征进行resize
data[index,:]=feature
return data
def load_file(self, fname):
data_rgb = np.load(
os.path.join(self.root, self.rgb, fname + '.npy'))
data_flow = np.load(
os.path.join(self.root, self.flow, fname + '.npy'))
if data_flow.shape[0] < data_rgb.shape[0]:
data_rgb = data_rgb[0:data_flow.shape[0], :]
elif data_flow.shape[0] > data_rgb.shape[0]:
data_flow = data_flow[0:data_rgb.shape[0], :]
data_rgb=self.resizeFeatures(data_rgb,501,201)
data_flow=self.resizeFeatures(data_flow,501,201)#将数据resize为需要大小
feats = np.concatenate((data_rgb, data_flow), axis=1)
if feats.shape[0] == 0 or feats.shape[1] == 0:
feats = np.zeros((512, 1024), np.float32)
logger.info('### file loading len = 0 {} ###'.format(fname))
return feats
# def load_file(self, fname):
# if python_ver < (3, 0):
# rgb_pkl = pickle.load(
# open(os.path.join(self.root, self.rgb, fname + '.npy'), 'rb'))
# flow_pkl = pickle.load(
# open(os.path.join(self.root, self.flow, fname + '.npy'), 'rb'))
# else:
# rgb_pkl = pickle.load(
# open(os.path.join(self.root, self.rgb, fname + '.npy'), 'rb'),
# encoding='bytes')
# flow_pkl = pickle.load(
# open(os.path.join(self.root, self.flow, fname + '.npy'), 'rb'),
# encoding='bytes')
# data_flow = np.array(flow_pkl[b'scores'])
# data_rgb = np.array(rgb_pkl[b'scores'])
# if data_flow.shape[0] < data_rgb.shape[0]:
# data_rgb = data_rgb[0:data_flow.shape[0], :]
# elif data_flow.shape[0] > data_rgb.shape[0]:
# data_flow = data_flow[0:data_rgb.shape[0], :]
# feats = np.concatenate((data_rgb, data_flow), axis=1)
# if feats.shape[0] == 0 or feats.shape[1] == 0:
# feats = np.zeros((512, 1024), np.float32)
# logger.info('### file loading len = 0 {} ###'.format(fname))
# return feats
def create_reader(self):
"""reader creator for ctcn model"""
if self.mode == 'infer':
return self.make_infer_reader()
if self.num_threads == 1:
return self.make_reader()
else:
return self.make_multiprocess_reader()
def make_infer_reader(self):
"""reader for inference"""
def reader():
with open(self.filelist) as f:
reader_list = f.readlines()
batch_out = []
for line in reader_list:
fname = line.strip().split()[0]
rgb_exist = os.path.exists(
os.path.join(self.root, self.rgb, fname + '.npy'))
flow_exist = os.path.exists(
os.path.join(self.root, self.flow, fname + '.npy'))
if not (rgb_exist and flow_exist):
logger.info('file not exist', fname)
continue
try:
feats = self.load_file(fname)
feats, boxes = self.resize(
feats, boxes=None, size=self.img_size)
h, w = feats.shape[:2]
feats = feats.reshape(1, h, w)
except:
logger.info('Error when loading {}'.format(fname))
continue
batch_out.append((feats, fname))
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
return reader
def make_reader(self):
"""single process reader"""
def reader():
with open(self.filelist) as f:
reader_list = f.readlines()
if self.mode == 'train':
random.shuffle(reader_list)
fnames = []
total_boxes = []
total_labels = []
total_label_ids = []
for i in range(len(reader_list)):
line = reader_list[i]
splited = line.strip().split()
rgb_exist = os.path.exists(
os.path.join(self.root, self.rgb, splited[0] + '.npy'))
flow_exist = os.path.exists(
os.path.join(self.root, self.flow, splited[0] + '.npy'))
if not (rgb_exist and flow_exist):
logger.info('file not exist {}'.format(splited[0]))
continue
fnames.append(splited[0])
frames_num = int(splited[1]) // self.snippet_length
num_boxes = int(splited[2])
box = []
label = []
for ii in range(num_boxes):
c = splited[3 + 3 * ii]
xmin = splited[4 + 3 * ii]
xmax = splited[5 + 3 * ii]
box.append([
float(xmin) / self.snippet_length,
float(xmax) / self.snippet_length
])
label.append(int(c))
total_label_ids.append(i)
total_boxes.append(box)
total_labels.append(label)
num_videos = len(fnames)
batch_out = []
for idx in range(num_videos):
fname = fnames[idx]
try:
feats = self.load_file(fname)
boxes = copy.deepcopy(total_boxes[idx])
labels = copy.deepcopy(total_labels[idx])
feats, boxes, labels = self.transform(feats, boxes, labels,
self.mode)
labels = labels.astype('int64')
boxes = boxes.astype('float32')
num_pos = len(np.where(labels > 0)[0])
except:
logger.info('Error when loading {}'.format(fname))
continue
if (num_pos < 1) and (self.mode == 'train' or
self.mode == 'valid'):
#logger.info('=== no pos for ==='.format(fname, num_pos))
continue
if self.mode == 'train' or self.mode == 'valid':
batch_out.append((feats, boxes, labels))
elif self.mode == 'test':
batch_out.append(
(feats, boxes, labels, total_label_ids[idx]))
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
return reader
def make_multiprocess_reader(self):
"""multiprocess reader"""
def read_into_queue(reader_list, queue):
fnames = []
total_boxes = []
total_labels = []
total_label_ids = []
#for line in reader_list:
for i in range(len(reader_list)):
line = reader_list[i]
splited = line.strip().split()
rgb_exist = os.path.exists(
os.path.join(self.root, self.rgb, splited[0] + '.npy'))
flow_exist = os.path.exists(
os.path.join(self.root, self.flow, splited[0] + '.npy'))
if not (rgb_exist and flow_exist):
logger.info('file not exist {}'.format(splited[0]))
continue
fnames.append(splited[0])
frames_num = int(splited[1]) // self.snippet_length
num_boxes = int(splited[2])
box = []
label = []
for ii in range(num_boxes):
c = splited[3 + 3 * ii]
xmin = splited[4 + 3 * ii]
xmax = splited[5 + 3 * ii]
box.append([
float(xmin) / self.snippet_length,
float(xmax) / self.snippet_length
])
label.append(int(c))
total_label_ids.append(i)
total_boxes.append(box)
total_labels.append(label)
num_videos = len(fnames)
batch_out = []
for idx in range(num_videos):
fname = fnames[idx]
try:
feats = self.load_file(fname)
# logger.info('load_file ok {}'.format(fname))
boxes = copy.deepcopy(total_boxes[idx])
labels = copy.deepcopy(total_labels[idx])
# logger.info('deepcopy ok {}'.format(fname))#=============================>
feats, boxes, labels = self.transform(feats, boxes, labels,
self.mode)
# logger.info('transform ok {}'.format(fname)) #=============================>
labels = labels.astype('int64')
boxes = boxes.astype('float32')
num_pos = len(np.where(labels > 0)[0])
# logger.info('try ok {}'.format(fname))
except:
logger.info('Error when loading {}'.format(fname))
continue
if (not (num_pos >= 1)) and (self.mode == 'train' or
self.mode == 'valid'):
#logger.info('=== no pos for {}, num_pos = {} ==='.format(fname, num_pos))
continue
if self.mode == 'train' or self.mode == 'valid':
batch_out.append((feats, boxes, labels))
elif self.mode == 'test':
batch_out.append(
(feats, boxes, labels, total_label_ids[idx]))
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
if len(batch_out) == self.batch_size:
queue.put(batch_out)
batch_out = []
queue.put(None)
def queue_reader():
with open(self.filelist) as f:
fl = f.readlines()
if self.mode == 'train':
random.shuffle(fl)
n = self.num_threads
queue_size = 20
reader_lists = [None] * n
file_num = int(len(fl) // n)
for i in range(n):
if i < len(reader_lists) - 1:
tmp_list = fl[i * file_num:(i + 1) * file_num]
else:
tmp_list = fl[i * file_num:]
reader_lists[i] = tmp_list
queue = multiprocessing.Queue(queue_size)
p_list = [None] * len(reader_lists)
# for reader_list in reader_lists:
for i in range(len(reader_lists)):
reader_list = reader_lists[i]
p_list[i] = multiprocessing.Process(
target=read_into_queue, args=(reader_list, queue))
p_list[i].start()
reader_num = len(reader_lists)
finish_num = 0
while finish_num < reader_num:
sample = queue.get()
if sample is None:
finish_num += 1
else:
yield sample
for i in range(len(p_list)):
if p_list[i].is_alive():
p_list[i].join()
return queue_reader
改完之后报什么错了呢?
DALI is not installed, you can improve performance if use DALI [INFO: train.py: 254]: Namespace(batch_size=None, config='./configs/ctcn.yaml', epoch=None, fix_random_seed=False, is_profiler=0, learning_rate=None, log_interval=10, model_name='CTCN', no_memory_optimize=False, pretrain=None, profiler_path='./', resume=None, save_dir='./data/checkpoints', use_gpu=True, valid_interval=0) [INFO: config_utils.py: 70]: ---------------- Train Arguments ----------------
W0718 14:57:35.395337 9542 device_context.cc:252] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 10.1, Runtime API Version: 9.0 W0718 14:57:35.399122 9542 device_context.cc:260] device: 0, cuDNN Version: 7.3. [INFO: detection_metrics.py: 68]: Resetting train metrics... [INFO: detection_metrics.py: 68]: Resetting valid metrics... [INFO: train_utils.py: 46]: ------- learning rate [0.], learning rate counter [-1] ----- [INFO: ctcn_reader.py: 434]: Error when loading v_df5rlVZD0Zc [INFO: ctcn_reader.py: 432]: try ok v_hlvs-e3bCq0 [INFO: ctcn_reader.py: 432]: try ok v_Z9gstJONME4 [INFO: ctcn_reader.py: 434]: Error when loading v_l3wFDLyBQ9U [INFO: ctcn_reader.py: 432]: try ok v_41Qick6tM [INFO: ctcn_reader.py: 432]: try ok v_0YQPGAsZPgY I0718 14:57:39.717900 9542 build_strategy.cc:361] set enable_sequential_execution:1 [INFO: ctcn_reader.py: 434]: Error when loading v_en4Boe2rAuo [INFO: ctcn_reader.py: 432]: try ok vMR8G1jwM4o [INFO: ctcn_reader.py: 432]: try ok vekWWP0dQZM [INFO: ctcn_reader.py: 432]: try ok v_6YmUCNIMpB4 [INFO: ctcn_reader.py: 432]: try ok v_bDf_xpUpdmU [INFO: ctcn_reader.py: 432]: try ok v_V-6nF6U6rfI [INFO: ctcn_reader.py: 434]: Error when loading v_ISa7BQJasm4 [INFO: ctcnreader.py: 432]: try ok v-KWToNMY1Lc [INFO: ctcn_reader.py: 434]: Error when loading v_QOaPQpXemCA [INFO: ctcn_reader.py: 432]: try ok v_7DY1vm9RiIk [INFO: ctcn_reader.py: 432]: try ok v_HrWr7FmBYn4 [INFO: ctcn_reader.py: 432]: try ok v_AffS41W1Jgg [INFO: ctcn_reader.py: 432]: try ok v_bM5VpCdPOrw [INFO: ctcn_reader.py: 432]: try ok v_EqhnCfb5vEc [INFO: ctcn_reader.py: 434]: Error when loading v_FKtYeu2bJpA [INFO: ctcn_reader.py: 432]: try ok v_BwwjQZDn3NE [INFO: ctcn_reader.py: 432]: try ok v_kbe4iowYMqM [INFO: ctcn_reader.py: 432]: try ok v_c0qbyRWSptg [INFO: ctcn_reader.py: 432]: try ok v_VVLeWYKoNUE [INFO: ctcn_reader.py: 434]: Error when loading v_zEttEkAdHts [INFO: ctcn_reader.py: 434]: Error when loading v_ZrAxt3ZjKdM [INFO: ctcn_reader.py: 432]: try ok v_poFH53rF9uY [INFO: ctcn_reader.py: 432]: try ok v_tO1VJnsd8sg [INFO: ctcn_reader.py: 434]: Error when loading v_5tM7hwg_8wU [INFO: metrics_util.py: 284]: [TRAIN 2020-07-18 14:57:42] Epoch 0, iter 0, time 3.5042059421539307, Loss = 28.324298858642578, loc_loss = 5.457037925720215, cls_loss = 22.86726188659668 [INFO: ctcn_reader.py: 432]: try ok v_mDqiAEB4Ads [INFO: ctcn_reader.py: 432]: try ok v_6GYdu5G61g8 [INFO: ctcn_reader.py: 432]: try ok v_iGax3fokst8 [INFO: ctcn_reader.py: 432]: try ok v_29k1TypoU4w [INFO: ctcn_reader.py: 434]: Error when loading v_K-rdUv9LXTc [INFO: ctcn_reader.py: 432]: try ok v_vuntaZJBcfI [INFO: ctcn_reader.py: 434]: Error when loading v_NaN4aHalv-k [INFO: ctcn_reader.py: 432]: try ok v_9WhPG89P-tg [INFO: ctcn_reader.py: 434]: Error when loading v_W5tacjehFRA [INFO: ctcn_reader.py: 432]: try ok v_kkcTQHFNXAg [INFO: ctcn_reader.py: 434]: Error when loading v_2ttzbjuKoT0 [INFO: ctcn_reader.py: 432]: try ok v_jXIKHEsmVl4 [INFO: ctcn_reader.py: 432]: try ok v_iM8rmKLJnt8 [INFO: ctcn_reader.py: 434]: Error when loading v_cGxZAOpmpKQ [INFO: ctcn_reader.py: 432]: try ok v_aDWrPrNFdR0 [INFO: ctcn_reader.py: 434]: Error when loading v_rSGboODhu04 [INFO: ctcn_reader.py: 434]: Error when loading v_MWjdJIAkMyI [INFO: ctcn_reader.py: 432]: try ok v_qlq13iS0nEA [INFO: ctcn_reader.py: 434]: Error when loading v_UNXLfqkwKFc [INFO: ctcn_reader.py: 432]: try ok v_zzci2xZ011A [INFO: ctcn_reader.py: 432]: try ok v_FUrLg7FETWo [INFO: ctcn_reader.py: 432]: try ok v_IcfWEKjl_AY [INFO: ctcn_reader.py: 432]: try ok v_lmYmYYXs1mo [INFO: ctcn_reader.py: 432]: try ok v_IoJoUIxzdac [INFO: ctcn_reader.py: 432]: try ok v_B7t85SESTXI [INFO: ctcn_reader.py: 432]: try ok v_hzuQYOG0a_g [INFO: ctcn_reader.py: 432]: try ok v_ub0pmYL8i5s [INFO: ctcn_reader.py: 432]: try ok v_Oj_PLXsWxL4 [INFO: ctcn_reader.py: 432]: try ok v_dJAZplo9ke0 [INFO: ctcn_reader.py: 432]: try ok v_fZQS02Ypca4 [INFO: ctcn_reader.py: 432]: try ok v_qHU7T2LBToI [INFO: ctcn_reader.py: 432]: try ok v_at_ahUtnF9Q [INFO: ctcn_reader.py: 432]: try ok v_nB90Q8sTBgE [INFO: ctcn_reader.py: 432]: try ok v_HGSZ9_CVuM4 [INFO: ctcnreader.py: 434]: Error when loading v-1EC1ZP6aC4 [INFO: ctcn_reader.py: 432]: try ok v_XC6tvSBS0PA [INFO: ctcn_reader.py: 432]: try ok v_JKZ-3N1fYL8 [INFO: ctcn_reader.py: 432]: try ok v_6tdIiKzMVcg [INFO: ctcn_reader.py: 434]: Error when loading v_QG2xYcN_bl0 [INFO: ctcn_reader.py: 434]: Error when loading v_XulEpl5Bi9A [INFO: ctcn_reader.py: 432]: try ok vuKKSGTNJAY [INFO: ctcn_reader.py: 432]: try ok v_sxf0x55Cvb4 [INFO: ctcn_reader.py: 432]: try ok v__uOfIm1tFcI [INFO: ctcn_reader.py: 432]: try ok v_GhwvPy4_2KE [INFO: ctcn_reader.py: 434]: Error when loading v_GeR07RwzkBw [INFO: ctcn_reader.py: 434]: Error when loading v_yo73PqGytMQ [INFO: ctcn_reader.py: 432]: try ok v_7MDr4f1r8rI [INFO: ctcn_reader.py: 434]: Error when loading v_UQFMy9Tz8dY [INFO: ctcn_reader.py: 432]: try ok v_8GxWehFZVRE [INFO: ctcn_reader.py: 432]: try ok v_5yqHTGQm2B0 [INFO: ctcn_reader.py: 432]: try ok v_SdsoRu3953g [INFO: ctcn_reader.py: 432]: try ok v_ebmi7XJA8Oo [INFO: ctcn_reader.py: 432]: try ok v_kCD0iQFnHA4 [INFO: ctcn_reader.py: 432]: try ok v_uHPrH22rPgU [INFO: ctcn_reader.py: 432]: try ok v_MOOeHWuuxlo [INFO: ctcn_reader.py: 434]: Error when loading v_OQWqRMr0PSA [INFO: ctcn_reader.py: 432]: try ok v_5rO2DwFhdwo [INFO: ctcn_reader.py: 432]: try ok v_YGBldj7DUq4 [INFO: ctcn_reader.py: 432]: try ok v_xLRUuV1z3QA [INFO: ctcn_reader.py: 432]: try ok v_scBelfrnHoI [INFO: ctcn_reader.py: 434]: Error when loading v_j_r7zJWxI8w [INFO: ctcn_reader.py: 432]: try ok v_UdIoEWadRxI [INFO: ctcn_reader.py: 432]: try ok v_KePjkCySBCs [INFO: ctcn_reader.py: 432]: try ok v_B69Fkd_L9gA [INFO: ctcn_reader.py: 432]: try ok v_gLfIPN_WM48 [INFO: ctcn_reader.py: 432]: try ok v_cp52LdlmlUk [INFO: ctcn_reader.py: 434]: Error when loading v_1xHdr8MWJOc
里面的try ok v_B69Fkd_L9gA类似的日志输出是我在里边加了打印日志已确定哪个节点出现问题
可以把try去掉 看看 Error when loading时具体问题是什么?
更改之后出现了下列错误,IndexError: index 0 is out of bounds for axis 0 with size 0应该是训练数据中没有box: DALI is not installed, you can improve performance if use DALI [INFO: train.py: 254]: Namespace(batch_size=None, config='./configs/ctcn.yaml', epoch=None, fix_random_seed=False, is_profiler=0, learning_rate=None, log_interval=10, model_name='CTCN', no_memory_optimize=False, pretrain=None, profiler_path='./', resume=None, save_dir='./data/checkpoints', use_gpu=True, valid_interval=0) [INFO: config_utils.py: 70]: ---------------- Train Arguments ----------------
W0720 11:27:48.408154 648 device_context.cc:252] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 9.2, Runtime API Version: 9.0 W0720 11:27:48.413208 648 device_context.cc:260] device: 0, cuDNN Version: 7.3. [INFO: detection_metrics.py: 68]: Resetting train metrics... [INFO: detection_metrics.py: 68]: Resetting valid metrics... [INFO: train_utils.py: 46]: ------- learning rate [0.], learning rate counter [-1] ----- Process Process-4: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 Process Process-5: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, *self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 Process Process-2: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(self._args, self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 Process Process-6: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 I0720 11:27:52.679932 648 build_strategy.cc:361] set enable_sequential_execution:1 Process Process-7: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, *self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 Process Process-1: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(self._args, self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 Process Process-3: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, *self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 [INFO: metrics_util.py: 284]: [TRAIN 2020-07-20 11:27:54] Epoch 0, iter 0, time 3.409763813018799, Loss = 29.358491897583008, loc_loss = 6.238377571105957, cls_loss = 23.120115280151367 Process Process-8: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(self._args, **self._kwargs) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 427, in read_into_queue self.mode) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 184, in transform feats, boxes, labels = self.random_move(feats, boxes, labels) File "/home/aistudio/models-release-1.8/PaddleCV/video/reader/ctcn_reader.py", line 86, in random_move new_img = bg[0:insert_place[index[0]], :] IndexError: index 0 is out of bounds for axis 0 with size 0 W0720 11:28:05.464061 862 operator.cc:187] conv2d_grad raises an exception paddle::memory::allocation::BadAlloc,
0 std::string paddle::platform::GetTraceBackString
ResourceExhaustedError:
Out of memory error on GPU 0. Cannot allocate 1.078823GB memory on GPU 0, available memory is only 153.937500MB.
Please check whether there is any other process using GPU 0.
If no, please decrease the batch size of your model.
at (/paddle/paddle/fluid/memory/allocation/cuda_allocator.cc:69) F0720 11:28:05.464174 862 exception_holder.h:37] std::exception caught,
0 std::string paddle::platform::GetTraceBackString
ResourceExhaustedError:
Out of memory error on GPU 0. Cannot allocate 1.078823GB memory on GPU 0, available memory is only 153.937500MB.
Please check whether there is any other process using GPU 0.
If no, please decrease the batch size of your model.
at (/paddle/paddle/fluid/memory/allocation/cuda_allocator.cc:69) Check failure stack trace: @ 0x7f2310f297dd google::LogMessage::Fail() @ 0x7f2310f2d28c google::LogMessage::SendToLog() @ 0x7f2310f29303 google::LogMessage::Flush() @ 0x7f2310f2e79e google::LogMessageFatal::~LogMessageFatal() @ 0x7f2313ee17c8 paddle::framework::details::ExceptionHolder::Catch() @ 0x7f2313f7c72e paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync() @ 0x7f2313f7a0cf paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp() @ 0x7f2313f7a394 _ZNSt17_Function_handlerIFvvESt17reference_wrapperISt12_Bind_simpleIFS1_ISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS6_12OpHandleBaseESt6atomicIiESt4hashISA_ESt8equal_toISA_ESaISt4pairIKSA_SC_EEESA_RKSt10shared_ptrINS5_13BlockingQueueImEEEEUlvE_vEEEvEEEE9_M_invokeERKSt9_Any_data @ 0x7f2310f86ca3 std::_Function_handler<>::_M_invoke() @ 0x7f2310d854c7 std::future_base::_State_base::_M_do_set() @ 0x7f238981aa99 pthread_once_slow @ 0x7f2313f76562 _ZNSt13__future_base11_Task_stateISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS4_12OpHandleBaseESt6atomicIiESt4hashIS8_ESt8equal_toIS8_ESaISt4pairIKS8_SA_EEES8_RKSt10shared_ptrINS3_13BlockingQueueImEEEEUlvE_vEESaIiEFvvEE6_M_runEv @ 0x7f2310d87924 _ZZN10ThreadPoolC1EmENKUlvE_clEv @ 0x7f234a20e421 execute_native_thread_routine_compat @ 0x7f23898136ba start_thread @ 0x7f238954941d clone @ (nil) (unknown) Aborted (core dumped)
我已经把batchsize设置为1了
机器显存多大呢 CTCN模型训练比较占用显存 batch_size=1时占用显存约为14G
使用的是AIstudio高级版跑的
您好,这个还没有,视频库提供的是抽好的特征,抽特征的模型可以根据需要自行尝试其他的