validate的速度特别慢

crownz-sec commented 3 years ago

我的设备：云平台的4核cpu，24g内存，2080ti-11g显卡。训练阶段，gpu正常使用，利用率100%。但是在测试时，gpu利用率0%，速度特别慢。之前的相关issues我也看了，但没有发现到相关的解决办法。之后在自己没有cpu的电脑上使用best.pt权重测试，发现测试一张图片的时间为1-2s，但在云平台测试一张图片的时间为15-25s。

注：multi_scale和flip都没有开

crownz-sec commented 3 years ago

发现是云平台读写文件很耗时，而且作者代码是每次测试一张图片就把结果写入对应文件，我的解决办法是将代码改成多线程，同时把测试结果先保存起来，最后一次性写入文件。后面计算mAP的Reading annotation部分代码也比较耗时，是for循环单个遍历的，代码也改成多线程了，这样的话速度快了很多。

crownz-sec commented 3 years ago

test.py的修改部分

        if self.__eval:
            mAP = 0
            print('*' * 20 + "Validate" + '*' * 20)

            with torch.no_grad():
                APs = Evaluator(self.__model).APs_voc()

                for i in APs:
                    print("{} --> mAP : {}".format(i, APs[i]))
                    mAP += APs[i]
                mAP = mAP / self.__num_class
                print('mAP:%g' % (mAP))

evaluator.py的修改部分

import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool        # 线程池

from tqdm import tqdm
from collections import defaultdict

class Evaluator(object):
    def __init__(self, model, visiual=True):
        self.classes = cfg.DATA["CLASSES"]
        self.pred_result_path = os.path.join('data', 'results')
        self.val_data_path = os.path.join('data', 'VOC', 'VOCtest-2007')
        self.conf_thresh = cfg.TEST["CONF_THRESH"]
        self.nms_thresh = cfg.TEST["NMS_THRESH"]
        self.val_shape =  cfg.TEST["TEST_IMG_SIZE"]

        self.__visiual = visiual
        self.__visual_imgs = 0

        self.model = model
        self.device = next(model.parameters()).device

        self.final_result = defaultdict(list)

    def APs_voc(self):
        img_inds_file = os.path.join(self.val_data_path,  'ImageSets', 'Main', 'test.txt')
        with open(img_inds_file, 'r') as f:
            lines = f.readlines()
            img_inds = [line.strip() for line in lines]

        if os.path.exists(self.pred_result_path):
            shutil.rmtree(self.pred_result_path)
        os.mkdir(self.pred_result_path)
        imgs_count = len(img_inds)
        cpu_nums = multiprocessing.cpu_count()
        pool = ThreadPool(cpu_nums)
        with tqdm(total=imgs_count) as pbar:
            for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds)):
                pbar.update()

#         with tqdm(total=20) as pbar:
#             for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds[:20])):
#                 pbar.update()

        for class_name in self.final_result:
            with open(os.path.join(self.pred_result_path, 'comp4_det_test_' + class_name + '.txt'), 'a') as f:
                    str_result = ''.join(self.final_result[class_name])
                    f.write(str_result)

        return self.__calc_APs()

    def Single_APs_voc(self, img_ind, multi_test=False, flip_test=False):
        img_path = os.path.join(self.val_data_path, 'JPEGImages', img_ind+'.jpg')
        img = cv2.imread(img_path)
        bboxes_prd = self.get_bbox(img, multi_test, flip_test)

        if bboxes_prd.shape[0]!=0 and self.__visiual and self.__visual_imgs < 100:
            boxes = bboxes_prd[..., :4]
            class_inds = bboxes_prd[..., 5].astype(np.int32)
            scores = bboxes_prd[..., 4]

            visualize_boxes(image=img, boxes=boxes, labels=class_inds, probs=scores, class_labels=self.classes)
            path = os.path.join(cfg.PROJECT_PATH, "data/results/{}.jpg".format(self.__visual_imgs))
            cv2.imwrite(path, img)

            self.__visual_imgs += 1

        for bbox in bboxes_prd:
            coor = np.array(bbox[:4], dtype=np.int32)
            score = bbox[4]
            class_ind = int(bbox[5])

            class_name = self.classes[class_ind]
            score = '%.4f' % score
            xmin, ymin, xmax, ymax = map(str, coor)
            result = ' '.join([img_ind, score, xmin, ymin, xmax, ymax]) + '\n'

            self.final_result[class_name].append(result)

voc_eval.py的修改部分

import multiprocessing
from tqdm import tqdm
from multiprocessing.dummy import Pool as ThreadPool        # 线程池

recs = {}
def parse_rec(param):
    """ Parse a PASCAL VOC xml file """
    path = param['path']
    img = param['img']
    filename = path.format(img)

    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    recs[img] = objects

def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the PASCAL VOC evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    print("Reading annotation...")
    if not os.path.isfile(cachefile):
        # load annots

        imgs_count = len(imagenames)
        cpu_nums = multiprocessing.cpu_count()
        pool = ThreadPool(cpu_nums)
        param_list = []
        for imagename in imagenames:
            param = {'path':annopath
                    ,'img':imagename}
            param_list.append(param)

        with tqdm(total=imgs_count) as pbar:
            for i, _ in enumerate(pool.imap_unordered(parse_rec, param_list)):
                pbar.update()

        global recs
        # save
        print ('Saving cached annotations to {:s}'.format(cachefile))
        with open(cachefile, 'wb') as f:
            pickle.dump(recs, f)
    else:
        # load
        with open(cachefile, 'rb') as f:
            recs = pickle.load(f)

因为也是刚接触多线程，代码可能多少有些问题，还请大家批评指正。

BBBBchan commented 3 years ago

您好，我参考了您修改后的代码，能够成功运行，validate的速度也有了明显提升。但是在成功validate几次之后，出现了如下报错

Traceback (most recent call last):
  File "train.py", line 160, in <module>
    gpu_id=opt.gpu_id).train()
  File "train.py", line 140, in train
    APs = Evaluator(self.yolov3).APs_voc()
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 51, in APs_voc
    for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds)):
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 731, in next
    raise value
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 68, in Single_APs_voc
    bboxes_prd = self.get_bbox(img, multi_test, flip_test)
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 106, in get_bbox
    bboxes = self.__predict(img, self.val_shape, (0, np.inf))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 119, in __predict
    _, p_d = self.model(img)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/yolov3.py", line 50, in forward
    x_s, x_m, x_l = self.__backnone(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/backbones/darknet53.py", line 53, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/layers/conv_module.py", line 36, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 353, in forward
    return self._conv_forward(input, self.weight)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 350, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED
terminate called without an active exception

我认为该报错可能是由于显存没有得到及时释放引起的，在多次validate之后，就有可能出现上述错误。请问您有好的解决方案吗？

crownz-sec commented 3 years ago

您好，我参考了您修改后的代码，能够成功运行，validate的速度也有了明显提升。但是在成功validate几次之后，出现了如下报错

Traceback (most recent call last):
  File "train.py", line 160, in <module>
    gpu_id=opt.gpu_id).train()
  File "train.py", line 140, in train
    APs = Evaluator(self.yolov3).APs_voc()
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 51, in APs_voc
    for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds)):
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 731, in next
    raise value
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 68, in Single_APs_voc
    bboxes_prd = self.get_bbox(img, multi_test, flip_test)
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 106, in get_bbox
    bboxes = self.__predict(img, self.val_shape, (0, np.inf))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 119, in __predict
    _, p_d = self.model(img)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/yolov3.py", line 50, in forward
    x_s, x_m, x_l = self.__backnone(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/backbones/darknet53.py", line 53, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/layers/conv_module.py", line 36, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 353, in forward
    return self._conv_forward(input, self.weight)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 350, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED
terminate called without an active exception

我认为该报错可能是由于显存没有得到及时释放引起的，在多次validate之后，就有可能出现上述错误。请问您有好的解决方案吗？

因为我没有在训练过程中使用这个代码，只是拿来最终测试了一下，所以也没有碰到你这个问题。我在这里找到了一些解决方案，可能减少batch_size是一个好点的解决方案。也可以去pytorch的论坛用英文关键词找一下，有很多细节问题都可以在上面找到。

Peterisfar / YOLOV3

validate的速度特别慢 #44