yolov5官方模型应用时要做适量修改,原因如下:
class End2End(nn.Module):
'''export onnx or tensorrt model with NMS operation.'''
def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, device=None, ort=False, trt_version=8, with_preprocess=False):
super().__init__()
device = device if device else torch.device('cpu')
self.with_preprocess = with_preprocess
self.model = model.to(device)
TRT = ONNX_TRT8
self.patch_model = TRT
self.nms = self.patch_model(max_obj, iou_thres, score_thres, device)
self.nms.eval()
# def forward(self, x, cord):
def forward(self, x):
x = self.model(x)[0]
num_det_whole, det_boxes_whole, det_scores_whole, det_classes_whole = self.nms(x)
det_result = torch.cat((det_boxes_whole, det_scores_whole.unsqueeze(2), det_classes_whole.float().unsqueeze(2)), dim=2)
return num_det_whole, det_result
class ONNX_TRT8(nn.Module):
'''onnx module with TensorRT NMS operation.'''
def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, device=None):
super().__init__()
self.device = device if device else torch.device('cpu')
self.background_class = -1,
self.box_coding = 1,
self.iou_threshold = iou_thres
self.max_obj = max_obj
self.plugin_version = '1'
self.score_activation = 0
self.score_threshold = score_thres
def forward(self, x):
box = x[:, :, :4]
conf = x[:, :, 4:5]
score = x[:, :, 5:]
score *= conf
num_det, det_boxes, det_scores, det_classes = TRT8_NMS.apply(box, score, self.background_class, self.box_coding,
self.iou_threshold, self.max_obj,
self.plugin_version, self.score_activation,
self.score_threshold)
return num_det, det_boxes, det_scores, det_classes
class TRT8_NMS(torch.autograd.Function):
'''TensorRT NMS operation'''
@staticmethod
def forward(
ctx,
boxes,
scores,
background_class=-1,
box_coding=1,
iou_threshold=0.45,
max_output_boxes=100,
plugin_version="1",
score_activation=0,
score_threshold=0.25,
):
batch_size, num_boxes, num_classes = scores.shape
num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
det_boxes = torch.randn(batch_size, max_output_boxes, 4)
det_scores = torch.randn(batch_size, max_output_boxes)
det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
return num_det, det_boxes, det_scores, det_classes
@staticmethod
def symbolic(g,
boxes,
scores,
background_class=-1,
box_coding=1,
iou_threshold=0.45,
max_output_boxes=100,
plugin_version="1",
score_activation=0,
score_threshold=0.25):
out = g.op("TRT::EfficientNMS_TRT",
boxes,
scores,
background_class_i=background_class,
box_coding_i=box_coding,
iou_threshold_f=iou_threshold,
max_output_boxes_i=max_output_boxes,
plugin_version_s=plugin_version,
score_activation_i=score_activation,
score_threshold_f=score_threshold,
outputs=4)
nums, boxes, scores, classes = out
return nums, boxes, scores, classes
PTQ共有3种实现方式,示例如下:
python onnx2trt_ptq.py
参数说明:
注意事项:
profile = builder.create_optimization_profile()
profile.set_shape(network.get_input(0).name, min=(1, 3, 384, 1280), opt=(12, 3, 384, 1280), max=(26, 3, 384, 1280))
config.add_optimization_profile(profile)
安装polygraphy
pip install colored polygraphy --extra-index-url https://pypi.ngc.nvidia.com
量化
polygraphy convert XX.onnx --int8 --fp16 --data-loader-script loader_data.py --calibration-cache XX.cache -o XX.trt --trt-min-shapes images:[1,3,384,1280] --trt-opt-shapes images:[26,3,384,1280] --trt-max-shapes images:[26,3,384,1280] #量化
参数说明:
注意事项:
python pytorch_yolov5_ptq.py
参数说明:
python pytorch_yolov5_qat.py -m best.pt ...#best.pt为准备工作训练好的模型,...为其他命令参考train.py训练,数据采用训练数据集
注意事项:
python export_onnx_qat.py
参数说明:
注意事项: