Open simple123456T opened 2 years ago
Windows paddlepaddle-gpu 2.3 cuda10.1 cudnn7.6.5
对main.py做了如下的修改:
import time import paddle import cv2 import numpy as np import argparse import onnxruntime paddle.device.set_device('gpu') class PP_YOLOE(): def __init__(self, model_path, label_path, prob_threshold=0.8): with open(label_path, 'rt') as f: self.class_names = f.read().rstrip('\n').split('\n') so = onnxruntime.SessionOptions() so.log_severity_level = 3 self.session = onnxruntime.InferenceSession(model_path, so) self.input_size = (640, 640) ###width, height self.mean_ = np.array([0.485, 0.456, 0.406], dtype=np.float32) self.std_ = np.array([0.229, 0.224, 0.225], dtype=np.float32) self.confThreshold = prob_threshold def preprocess(self, srcimg): img = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB) img = cv2.resize(img, self.input_size, interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) img = img / 255. img -= self.mean_[None, None, :] img /= self.std_[None, None, :] img = np.transpose(img, [2, 0, 1]) scale_factor = np.array([1., 1.], dtype=np.float32) return img, scale_factor def detect(self, srcimg): img, scale_factor = self.preprocess(srcimg) inputs = {'image': img[None, :, :, :], 'scale_factor': scale_factor[None, :]} ort_inputs = {i.name: inputs[i.name] for i in self.session.get_inputs() if i.name in inputs} output = self.session.run(None, ort_inputs) bbox, bbox_num = output keep_idx = (bbox[:, 1] > self.confThreshold) & (bbox[:, 0] > -1) bbox = bbox[keep_idx, :] ratioh = srcimg.shape[0] / self.input_size[1] ratiow = srcimg.shape[1] / self.input_size[0] for (clsid, score, xmin, ymin, xmax, ymax) in bbox: xmin = int(xmin * ratiow) ymin = int(ymin * ratioh) xmax = int(xmax * ratiow) ymax = int(ymax * ratioh) cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), (0, 0, 255), thickness=2) cv2.putText(srcimg, self.class_names[int(clsid)] + ': ' + str(round(score, 2)), (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), thickness=1) # print(self.class_names[int(clsid)]) return srcimg if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--imgpath', type=str, default='', help="image path") parser.add_argument('--modelpath', type=str, default='model/ppyoloe_crn_s_300e_coco.onnx', help="onnx filepath") parser.add_argument('--classfile', type=str, default='coco.names', help="classname filepath") parser.add_argument('--confThreshold', default=0.7, type=float, help='class confidence') parser.add_argument('--cameraId', default=0, type=int, help=' camera id') args = parser.parse_args() net = PP_YOLOE(args.modelpath, args.classfile, prob_threshold=args.confThreshold) # camera id cap = cv2.VideoCapture(args.cameraId) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) while cap.isOpened(): _ , frame = cap.read() last_time = time.time() srcimg = net.detect(frame) winName = 'Deep learning object detection in ONNXRuntime' fps = 1 / (time.time() - last_time) cv2.putText(frame, 'fps:{}'.format(float('%.2f' % fps)), (5, 50), cv2.FONT_HERSHEY_PLAIN, 1.2, (0, 0, 255), 2) cv2.imshow(winName, srcimg) if cv2.waitKey(1) in [ord('q'), 27]: break cap.release() cv2.destroyAllWindows()
运行效果:
这个程序是用onnxruntime做推理引擎的,你安装的onnxruntime是gpu版本的吗? pip install onnxruntime-gpu
when i change my onnxruntime version, it also like the same way.
appdirs==1.4.4
astor==0.8.1
certifi==2022.5.18.1
charset-normalizer==2.0.12
decorator==5.1.1
flatbuffers==2.0
GPUtil==1.4.0
graphsurgeon @ file:///D:/Advanced_research_projects/ppyoloe/conda%E7%8E%AF%E5%A2%83/TensorRT-6.0.1.5/graphsurgeon/graphsurgeon-0.4.1-py2.py3-none-any.whl
idna==3.3
numpy==1.22.4
onnxruntime-gpu==1.11.1
opencv-python==4.5.5.64
opt-einsum==3.3.0
paddle-bfloat==0.1.2
paddlepaddle-gpu @ file:///D:/Advanced_research_projects/ppyoloe_trt/conda%E7%8E%AF%E5%A2%83/paddlepaddle_gpu-2.3.0.post101-cp38-cp38-win_amd64.whl
Pillow==9.1.1
platformdirs==2.5.2
protobuf==3.20.1
psutil==5.9.1
pyaml==21.10.1
pycuda @ file:///D:/Advanced_research_projects/ppyoloe/conda%E7%8E%AF%E5%A2%83/pycuda-2020.1%2Bcuda101-cp38-cp38-win_amd64.whl
pynvml==11.4.1
pytools==2022.1.9
PyYAML==6.0
requests==2.27.1
scipy==1.8.1
six==1.16.0
typing_extensions==4.2.0
uff @ file:///D:/Advanced_research_projects/ppyoloe/conda%E7%8E%AF%E5%A2%83/TensorRT-6.0.1.5/uff/uff-0.6.5-py2.py3-none-any.whl
urllib3==1.26.9
wincertstore==0.2
这是我的conda 环境
@simple123456T 对于onnxruntime, 我设置了self.session = ort.InferenceSession(model_path, so, providers=['CUDAExecutionProvider'])
, 然并未发现cpu版本比cpu版本快.
cpu 比 cpu 快? -----这是我另外一个github账号,还是同一个我
@gouzi-tu 抱歉,打错,是并未发现gpu版本比cpu快 .
之前没注意到你已经设置为['CUDAExecutionProvider']了... 话说你显卡型号是什么呢? 如果是好显卡的话 那可能说明这个模型更适合并行而不是串行 另外可能需要你更新cuda到10.2, 并更新cudnn到8.2. 你也可以尝试['TensorrtExecutionProvider'], 就是要等一段量化时间(需要安装tensorrt, 现在支持py了)
然后那个cv2.waitkey(1)实际耗时15ms, 你帧数不可能超过66FPS, 你不如把它去掉在另一个进程显示效果
Windows paddlepaddle-gpu 2.3 cuda10.1 cudnn7.6.5
对main.py做了如下的修改:
运行效果: