zylo117 / Yet-Another-EfficientDet-Pytorch

The pytorch re-implement of the official efficientdet with SOTA performance in real time and pretrained weights.
GNU Lesser General Public License v3.0
5.2k stars 1.27k forks source link

To make your efficientdet_test.py display more colorful and beautiful #59

Open vvgoder opened 4 years ago

vvgoder commented 4 years ago

``# Author: Zylo117

""" Simple Inference Script of EfficientDet-Pytorch """ import time import random import torch from torch.backends import cudnn

from backbone import EfficientDetBackbone import cv2 import numpy as np

from efficientdet.utils import BBoxTransform, ClipBoxes from utils.utils import preprocess, invert_affine, postprocess

compound_coef = 7 force_input_size = None # set None to use default size img_path = 'e:/save_data/262.jpg'

threshold = 0.2 iou_threshold = 0.2

use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True

obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

tf bilinear interpolation is different from any other's, just make do

input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requiresgrad(False) model.eval()

if use_cuda: model = model.cuda() if use_float16: model = model.half()

with torch.no_grad(): features, regression, classification, anchors = model(x)

regressBoxes = BBoxTransform()
clipBoxes = ClipBoxes()

out = postprocess(x,
                  anchors, regression, classification,
                  regressBoxes, clipBoxes,
                  threshold, iou_threshold)

def plot_one_box(img, coord, label=None, color=None, line_thickness=None):

tl = line_thickness or int(round(0.002 * max(img.shape[0:2])))  # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
    tf = max(tl - 1, 1)  # font thickness
    t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
    c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
    cv2.rectangle(img, c1, c2, color, -1)  # filled
    cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)

def get_color_table(class_num, seed=2): random.seed(seed) color_table = {} for i in range(class_num): colortable[i] = [random.randint(0, 255) for in range(3)] return color_table

def display(preds, imgs, imshow=True, imwrite=False): color_table = get_color_table(len(obj_list)) for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue

    for j in range(len(preds[i]['rois'])):
        x0, y0, x1, y1 = preds[i]['rois'][j].astype(np.int)
        obj = obj_list[preds[i]['class_ids'][j]]
        score = float(preds[i]['scores'][j])
        plot_one_box(imgs[i], [x0, y0, x1, y1], label=obj, color=color_table[int(obj_list.index(obj))])

    if imshow:
        cv2.imshow('img', imgs[i])
        cv2.waitKey(0)

    if imwrite:
        cv2.imwrite(f'test/img_inferred_d{compound_coef}_this_repo_{i}.jpg', imgs[i])

out = invert_affine(framed_metas, out) display(out, ori_imgs, imshow=True, imwrite=False)

print('running speed test...') with torch.nograd(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for in range(10): _, regression, classification, anchors = model(x)

    out = postprocess(x,
                      anchors, regression, classification,
                      regressBoxes, clipBoxes,
                      threshold, iou_threshold)
    out = invert_affine(framed_metas, out)

t2 = time.time()
tact_time = (t2 - t1) / 10
print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')

# uncomment this if you want a extreme fps test
# print('test2: model inferring only')
# print('inferring images for batch_size 32 for 10 times...')
# t1 = time.time()
# x = torch.cat([x] * 32, 0)
# for _ in range(10):
#     _, regression, classification, anchors = model(x)
#
# t2 = time.time()
# tact_time = (t2 - t1) / 10
# print(f'{tact_time} seconds, {32 / tact_time} FPS, @batch_size 32')
zylo117 commented 4 years ago

you can create a pull request, and then I try to merge it.