Open kjgfcdb opened 11 months ago
使用了来自Ultralytics的yolov8n.pt模型导出的yolov8n.onnx(导出命令为yolo export model=yolov8n.pt format=onnx opset=13),通过如下脚本进行int8量化:
yolo export model=yolov8n.pt format=onnx opset=13
import torch import cv2 import numpy as np from typing import Tuple from ppq import * from ppq.api import * def preprocess(img: np.ndarray, img_shape: Tuple[int, int] = (640, 640)) -> np.ndarray: h, w = img.shape[:2] ih, iw = img_shape rh, rw = ih / h, iw / w r = min(rh, rw) new_h, new_w = int(round(r * h)), int(round(r * w)) pad_h, pad_w = ih - new_h, iw - new_w top = pad_h // 2 bottom = pad_h - top left = pad_w // 2 right = pad_w - left img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) img = cv2.copyMakeBorder( img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) ) # add border img = img[None, ...] img = np.ascontiguousarray(img.transpose((0, 3, 1, 2))).astype("float32") img /= 255 return torch.from_numpy(img) QUANT_PLATFROM = TargetPlatform.OPENVINO_INT8 MODEL = "yolov8n.onnx" INPUT_SHAPE = [1, 3, 640, 640] frame = cv2.imread("./bus.jpg") SAMPLES = [preprocess(frame) for _ in range(32)] DEVICE = "cpu" FINETUNE = False QS = QuantizationSettingFactory.default_setting() EXECUTING_DEVICE = "cpu" REQUIRE_ANALYSE = True # ------------------------------------------------------------------- # 下面向你展示了常用参数调节选项: # ------------------------------------------------------------------- QS.lsq_optimization = FINETUNE # 启动网络再训练过程,降低量化误差 QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 QS.lsq_optimization_setting.collecting_device = ( "cpu" # 缓存数据放在那,cuda 就是放在 gpu,如果显存超了你就换成 'cpu' ) # 把量化的不太好的算子送回 FP32 # QS.dispatching_table.append(operation='OP NAME', platform=TargetPlatform.FP32) print("正准备量化你的网络,检查下列设置:") print(f"TARGET PLATFORM : {QUANT_PLATFROM.name}") print(f"NETWORK INPUTSHAPE : {INPUT_SHAPE}") # ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 # 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 qir = quantize_onnx_model( onnx_import_file=MODEL, calib_dataloader=SAMPLES, calib_steps=128, setting=QS, input_shape=INPUT_SHAPE, # collate_fn=lambda x: x.to(EXECUTING_DEVICE), platform=QUANT_PLATFROM, do_quantize=True, device="cpu", verbose=1, ) print("正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:") reports = graphwise_error_analyse( graph=qir, running_device=EXECUTING_DEVICE, steps=32, dataloader=SAMPLES, collate_fn=lambda x: x.to(EXECUTING_DEVICE), ) for op, snr in reports.items(): if snr > 0.1: ppq_warning(f"层 {op} 的累计量化误差显著,请考虑进行优化") if REQUIRE_ANALYSE: print("正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:") layerwise_error_analyse( graph=qir, running_device=EXECUTING_DEVICE, interested_outputs=None, dataloader=SAMPLES, collate_fn=lambda x: x.to(EXECUTING_DEVICE), ) print("网络量化结束,正在生成目标文件:") export_ppq_graph(graph=qir, platform=QUANT_PLATFROM, graph_save_to="model_int8.onnx")
代码中用到的图像为bus.jpg。
我观察了log似乎只有下面这两个警告值得注意
[Warning] Unexpected input value of operation /model.10/Resize, recieving "None" at its input 1 [Warning] Unexpected input value of operation /model.13/Resize, recieving "None" at its input 1
而量化过程中的信噪比结果都没有超过0.1。
量化后的模型model_int8.onnx检出不了任何结果,下面是它和yolov8n.onnx的结果对比
model_int8.onnx
yolov8n.onnx
请问这种异常可能是什么原因导致的呢?谢谢!
可以提供yolo8.onnx给我吗?
@ZhangZhiPku 好的,请查收 yolov8n.zip
您好,遇到同样的问题,请问怎么解决?
解决了吗?
复现步骤
使用了来自Ultralytics的yolov8n.pt模型导出的yolov8n.onnx(导出命令为
yolo export model=yolov8n.pt format=onnx opset=13
),通过如下脚本进行int8量化:代码中用到的图像为bus.jpg。
Log
我观察了log似乎只有下面这两个警告值得注意
而量化过程中的信噪比结果都没有超过0.1。
异常结果
量化后的模型
model_int8.onnx
检出不了任何结果,下面是它和yolov8n.onnx
的结果对比请问这种异常可能是什么原因导致的呢?谢谢!