Closed akashAD98 closed 2 years ago
pred.detect_video('../src/video1.mp4') # set 0 use a webcam
@Linaom1214 thanks
cd tensorrt-python/yolov7/
from utils.utils import preproc, vis
from utils.utils import BaseEngine
import numpy as np
import cv2
import time
import os
class Predictor(BaseEngine):
def __init__(self, engine_path , imgsz=(640,640)):
super(Predictor, self).__init__(engine_path)
self.imgsz = imgsz
self.n_classes = 80
self.class_names = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ]
if __name__ == '__main__':
pred = Predictor(engine_path='/content/tensorrt-python/yolov7/yolov7-tiny-nms.trt')
img_path = '/content/tensorrt-python/src/3.jpg'
origin_img = pred.inference(img_path, conf=0.3)
cv2.imwrite("%s_yolov7.jpg" % os.path.splitext(
os.path.split(img_path)[-1])[0], origin_img)
pred.detect_video('/content/tensorrt-python/src/video1.mp4') # set 0 use a webcam
pred.get_fps() ```
im getting this issue . while doing inference. i converted model using this notebook
im getting this issue . while doing inference. i converted model using this notebook
the reop don't support end2end model, please reference the readme to convert trt engine.
pip install --upgrade setuptools pip --user
pip install nvidia-pyindex
pip install --upgrade nvidia-tensorrt
pip install pycuda
Here is a Python Demo mybe help you quickly understand this repo Link
Modify code: Modify the as follows:
return x if else (, 1), ) if not self.export else (, 1), x)
python models/ --weights ../ --grid
python -o onnx-name -e trt-name -p fp32/16/int8
cd yolov7
C++ Demo
my trained model with yolov7.pth will not support here? should we need to modify & again we need to train model ? image inferenceing is supported by this model but on video inference I'm getting this issue. @Linaom1214
If image inferencing is working, why don't you just change the input from images to video frames?
Which code are you using to predict on images?
im using this notebook
already given video path & here it has only image inference, so I added detect_video() in the code.
origin_img = pred.inference('/content/yolov7/inference/images/horses.jpg')
pred.detect_video('/content/tensorrt-python/src/video1.mp4') # set 0 use a webcam
the repo tensorrt-python
don't support end2end model, you can refer the function def detect_video(self, video_path):
than modify the code in
it`s so easy
@Linaom1214 sorry sir but I'm using your script google collab of yolov7.
still I'm getting the error, whats wrong
@Linaom1214 sorry sir but I'm using your script google collab of yolov7.
still I'm getting the error, whats wrong
please provide more detials
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import cv2
class BaseEngine(object):
def __init__(self, engine_path, imgsz=(640,640)):
self.imgsz = imgsz
self.mean = None
self.std = None
self.n_classes = 80
self.class_names = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ]
logger = trt.Logger(trt.Logger.WARNING)
runtime = trt.Runtime(logger)
with open(engine_path, "rb") as f:
serialized_engine =
engine = runtime.deserialize_cuda_engine(serialized_engine)
self.context = engine.create_execution_context()
self.inputs, self.outputs, self.bindings = [], [], [] = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding))
dtype = trt.nptype(engine.get_binding_dtype(binding))
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
if engine.binding_is_input(binding):
self.inputs.append({'host': host_mem, 'device': device_mem})
self.outputs.append({'host': host_mem, 'device': device_mem})
def infer(self, img):
self.inputs[0]['host'] = np.ravel(img)
# transfer data to the gpu
for inp in self.inputs:
cuda.memcpy_htod_async(inp['device'], inp['host'],
# run inference
# fetch outputs from gpu
for out in self.outputs:
cuda.memcpy_dtoh_async(out['host'], out['device'],
# synchronize stream
data = [out['host'] for out in self.outputs]
return data
def detect_video(self, video_path):
cap = cv2.VideoCapture(video_path)
while True:
ret, frame =
if not ret:
blob, ratio = preproc(frame, self.imgsz, self.mean, self.std)
data = self.infer(blob)
predictions = np.reshape(data, (1, -1, int(5+self.n_classes)))[0]
dets = self.postprocess(predictions,ratio)
if dets is not None:
final_boxes, final_scores, final_cls_inds = dets[:,
:4], dets[:, 4], dets[:, 5]
frame = vis(frame, final_boxes, final_scores, final_cls_inds,
conf=0.5, class_names=self.class_names)
cv2.imshow('frame', frame)
if cv2.waitKey(25) & 0xFF == ord('q'):
def inference(self, img_path, conf=0.25):
origin_img = cv2.imread(img_path)
origin_img = cv2.cvtColor(origin_img, cv2.COLOR_BGR2RGB)
img, ratio = preproc(origin_img, self.imgsz, self.mean, self.std)
num, final_boxes, final_scores, final_cls_inds = self.infer(img)
final_boxes = np.reshape(final_boxes, (-1, 4))
num = num[0]
if num >0:
final_boxes, final_scores, final_cls_inds = final_boxes[:num]/ratio, final_scores[:num], final_cls_inds[:num]
origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,
conf=conf, class_names=self.class_names)
origin_img = cv2.cvtColor(origin_img, cv2.COLOR_RGB2BGR)
return origin_img
def get_fps(self):
# warmup
import time
img = np.ones((1,3,self.imgsz[0], self.imgsz[1]))
img = np.ascontiguousarray(img, dtype=np.float32)
for _ in range(20):
_ = self.infer(img)
t1 = time.perf_counter()
_ = self.infer(img)
print(1/(time.perf_counter() - t1), 'FPS')
def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
if len(image.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
padded_img = np.ones(input_size) * 114.0
img = np.array(image)
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
(int(img.shape[1] * r), int(img.shape[0] * r)),
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img[:, :, ::-1]
padded_img /= 255.0
if mean is not None:
padded_img -= mean
if std is not None:
padded_img /= std
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
_COLORS = np.array(
0.000, 0.447, 0.741,
0.850, 0.325, 0.098,
0.929, 0.694, 0.125,
0.494, 0.184, 0.556,
0.466, 0.674, 0.188,
0.301, 0.745, 0.933,
0.635, 0.078, 0.184,
0.300, 0.300, 0.300,
0.600, 0.600, 0.600,
1.000, 0.000, 0.000,
1.000, 0.500, 0.000,
0.749, 0.749, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 1.000,
0.667, 0.000, 1.000,
0.333, 0.333, 0.000,
0.333, 0.667, 0.000,
0.333, 1.000, 0.000,
0.667, 0.333, 0.000,
0.667, 0.667, 0.000,
0.667, 1.000, 0.000,
1.000, 0.333, 0.000,
1.000, 0.667, 0.000,
1.000, 1.000, 0.000,
0.000, 0.333, 0.500,
0.000, 0.667, 0.500,
0.000, 1.000, 0.500,
0.333, 0.000, 0.500,
0.333, 0.333, 0.500,
0.333, 0.667, 0.500,
0.333, 1.000, 0.500,
0.667, 0.000, 0.500,
0.667, 0.333, 0.500,
0.667, 0.667, 0.500,
0.667, 1.000, 0.500,
1.000, 0.000, 0.500,
1.000, 0.333, 0.500,
1.000, 0.667, 0.500,
1.000, 1.000, 0.500,
0.000, 0.333, 1.000,
0.000, 0.667, 1.000,
0.000, 1.000, 1.000,
0.333, 0.000, 1.000,
0.333, 0.333, 1.000,
0.333, 0.667, 1.000,
0.333, 1.000, 1.000,
0.667, 0.000, 1.000,
0.667, 0.333, 1.000,
0.667, 0.667, 1.000,
0.667, 1.000, 1.000,
1.000, 0.000, 1.000,
1.000, 0.333, 1.000,
1.000, 0.667, 1.000,
0.333, 0.000, 0.000,
0.500, 0.000, 0.000,
0.667, 0.000, 0.000,
0.833, 0.000, 0.000,
1.000, 0.000, 0.000,
0.000, 0.167, 0.000,
0.000, 0.333, 0.000,
0.000, 0.500, 0.000,
0.000, 0.667, 0.000,
0.000, 0.833, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 0.167,
0.000, 0.000, 0.333,
0.000, 0.000, 0.500,
0.000, 0.000, 0.667,
0.000, 0.000, 0.833,
0.000, 0.000, 1.000,
0.000, 0.000, 0.000,
0.143, 0.143, 0.143,
0.286, 0.286, 0.286,
0.429, 0.429, 0.429,
0.571, 0.571, 0.571,
0.714, 0.714, 0.714,
0.857, 0.857, 0.857,
0.000, 0.447, 0.741,
0.314, 0.717, 0.741,
0.50, 0.5, 0
).astype(np.float32).reshape(-1, 3)
def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
for i in range(len(boxes)):
box = boxes[i]
cls_id = int(cls_ids[i])
score = scores[i]
if score < conf:
x0 = int(box[0])
y0 = int(box[1])
x1 = int(box[2])
y1 = int(box[3])
color = (_COLORS[cls_id % 80] * 255).astype(np.uint8).tolist()
text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id % 80]) > 0.5 else (255, 255, 255)
txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
txt_bk_color = (_COLORS[cls_id % 80] * 255 * 0.7).astype(np.uint8).tolist()
(x0, y0 + 1),
(x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])),
cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
return img```
# image
pred = BaseEngine(engine_path='/content/tensorrt-python/yolov7-tiny-nms.trt')
origin_img = pred.inference('/content/yolov7/inference/images/horses.jpg')
import matplotlib.pyplot as plt
plt.imshow(origin_img[:, :, ::-1])
## video
pred.detect_video('/content/tensorrt-python/src/video1.mp4') # set 0 use a webcam
i added detect_video() function inside this google collab , image inference is working fine ,but for video im getting issues, & im using your repo which is submitted by you on yolov7
I'm getting this error
it would be great if you add video inference on the same your google collab notebook
the end2end model means nms opterate is include in trt engine, you should know that. so only copy the code is not work, now you can try the model which not include nms plugin, after i will update this reop.
@Linaom1214 thank you ,waiting for video inference code for end2end/nms model
update the end2end support now you can export the model which include nms plugin more simple.
you only need export the onnx model which have one output.
than use this reop to add the pugin in model, i also provide image and video infer demo
@Linaom1214 not able to do inference on video, & also video is not saving in output folder
colab don't support opencv imshow function
after the model is converted into .trt we want to do inference on video.