WongKinYiu / ScaledYOLOv4

Scaled-YOLOv4: Scaling Cross Stage Partial Network
GNU General Public License v3.0
2.02k stars 575 forks source link

convert to ONNX #371

Open fardinadii opened 2 years ago

fardinadii commented 2 years ago

Hi, Im trying to convert output model of ScaledYOLOv4 to ONNX but i faced with error:

. . . . ) # /yolov4/models/yolo.py:38:0 %603 : Float(1, 3, 18, 18, 12, strides=[11664, 3888, 216, 12, 1], requires_grad=1, device=cpu) = onnx::Transposeperm=[0, 1, 3, 4, 2] # /yolov4/models/yolo.py:38:0 return (%output, %583, %603)

I used models/export.py functio.

siriasadeddin commented 2 years ago
# Input
img = torch.randn((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection

# Load PyTorch model
attempt_download(opt.weights)
model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
from onnxsim import simplify
# Update model
for k, m in model.named_modules():
    m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
    if isinstance(m, models.common.Conv) and isinstance(m.act, models.common.Mish):
        m.act = Mish()  # assign activation
    if isinstance(m, models.common.BottleneckCSP) or isinstance(m, models.common.BottleneckCSP2) \
                or isinstance(m, models.common.SPPCSP):
        if isinstance(m.bn, nn.SyncBatchNorm):
            bn = nn.BatchNorm2d(m.bn.num_features, eps=m.bn.eps, momentum=m.bn.momentum)
            bn.training = False
            bn._buffers = m.bn._buffers
            bn._non_persistent_buffers_set = set()
            m.bn = bn
        if isinstance(m.act, models.common.Mish):
            m.act = Mish()  # assign activation
   #if isinstance(m, models.yolo.Detect):
    #    m.forward = m.forward_export  # assign forward (optional)

model.eval()
model.model[-1].export = True  # set Detect() layer export=True

#y = model(img)  # dry run

import onnx
import onnxruntime

print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx')  # filename
try:
    model.fuse()  # only for ONNX
    torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['input'])

   # Checks
   onnx_model = onnx.load(f)  # load onnx model
   model_simp, check = simplify(onnx_model)
   assert check, "Simplified ONNX model could not be validated"
   print('ONNX export success, saved as %s' % f)
except Exception as e:
    print('ONNX export failure: %s' % e)
RohitKeshari commented 2 years ago

@siriasadeddin I noticed the performance degradation while converting nn.SyncBatchNorm this way, not sure why. Also output value for pytorch model and onnx model is changed.

siriasadeddin commented 2 years ago

Hi! can you tell me how did you test it so I can also try it. I tried for example



#python test.py --img-size 1536 --weights ./yolov4-p7.pt 
sys.path.append('./')  # to run '$ python *.py' files in subdirectories

import argparse

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Mish
import numpy as np
from utils.google_utils import attempt_download

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='./yolov4-p5.pt', help='weights path')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)

    # Input
    img = torch.randn((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection

    # Load PyTorch model
    attempt_download(opt.weights)
    model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
    model1=model
    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
        if isinstance(m, models.common.Conv) and isinstance(m.act, models.common.Mish):
            m.act = Mish()  # assign activation
        if isinstance(m, models.common.BottleneckCSP) or isinstance(m, models.common.BottleneckCSP2) \
                or isinstance(m, models.common.SPPCSP):
            if isinstance(m.bn, nn.SyncBatchNorm):
                bn = nn.BatchNorm2d(m.bn.num_features, eps=m.bn.eps, momentum=m.bn.momentum)
                bn.training = False
                bn._buffers = m.bn._buffers
                bn._non_persistent_buffers_set = set()
                m.bn = bn
            if isinstance(m.act, models.common.Mish):
                m.act = Mish()  # assign activation
        #if isinstance(m, models.yolo.Detect):
        #    m.forward = m.forward_export  # assign forward (optional)

    model.eval()
    model1.eval()

    def to_numpy(tensor):
        return tensor.detach().numpy()    
    torch_out_rand = model(img)
    torch_out_rand = [x for x in torch_out_rand]
    torch_out_rand1 = model1(img)
    torch_out_rand1 = [x for x in torch_out_rand1]
    print(torch_out_rand[0])
    print(torch_out_rand1[0])
    np.testing.assert_allclose(to_numpy(torch_out_rand[0]), to_numpy(torch_out_rand1[0]), rtol=5e-03, atol=1e-04)
    print(np.max(np.abs(to_numpy(torch_out_rand[0])-to_numpy(torch_out_rand1[0]))))```

And seems that model1 and model are giving the same results. I think your differences are from other place.
lantudou commented 2 years ago

@RohitKeshari You are right. I use Pytorch1.7 and recover your problem. The export.py comes from here:https://github.com/linghu8812/tensorrt_inference/blob/master/ScaledYOLOv4/export_onnx.py

Actually, It seems like that this code is not working for the newest branch now. Here is my solution:

import argparse

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Mish
from onnxsim import simplify

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='./weights/yolov4-p5.pt', help='weights path')  # from yolov5/models/
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)

    # Input
    img = torch.zeros((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection

    # Load PyTorch model
    model = attempt_load(opt.weights, map_location=torch.device('cuda:0'))  # load FP32 model

    img = torch.zeros((1, 3, 640, 640), device=torch.device('cuda:0'))

    #model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model

    #img = torch.zeros((1, 3, 640, 640), device=torch.device('cpu'))
    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
        if isinstance(m, models.common.Conv) and isinstance(m.act, models.common.Mish):
            m.act = Mish()  # assign activation
        if isinstance(m, models.common.BottleneckCSP) or isinstance(m, models.common.BottleneckCSP2) \
                or isinstance(m, models.common.SPPCSP):
    #        if isinstance(m.bn, nn.SyncBatchNorm):
    #            bn = nn.BatchNorm2d(m.bn.num_features, eps=m.bn.eps, momentum=m.bn.momentum)
    #            bn.training = False
    #            bn._buffers = m.bn._buffers
    #            bn._non_persistent_buffers_set = set()
    #            m.bn = bn
           if isinstance(m.act, models.common.Mish):
                m.act = Mish()  # assign activation
        # if isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)

    #y = model(img)  # dry run
    #print(y[0])

    model.eval()
    model.model[-1].export = True  # set Detect() layer export=True
    y = model(img)  # dry run
    print(y)
    print(y.shape)
    # ONNX export
    try:
        import onnx

        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
        f = opt.weights.replace('.pt', '.onnx')  # filename
        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                          output_names=['output'])

        # Checks
        onnx_model = onnx.load(f)  # load onnx model
        model_simp, check = simplify(onnx_model)
        assert check, "Simplified ONNX model could not be validated"
        onnx.save(model_simp, f)
        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
        print('ONNX export success, saved as %s' % f)
    except Exception as e:
        print('ONNX export failure: %s' % e)

    import onnx
    import onnxruntime
    ort_session = onnxruntime.InferenceSession(f)

    def to_numpy(tensor):
        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
    img = torch.zeros((opt.batch_size, 3, *opt.img_size))
    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)}
    ort_outs = ort_session.run(None, ort_inputs)

    print(ort_outs)
    # Finish
    print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')

And you also need to change the class Detect in yolo.py after you finished the training process:

class Detect(nn.Module):
    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(Detect, self).__init__()
        self.stride = None  # strides computed during build
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer('anchors', a)  # shape(nl,na,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.export = False  # onnx export

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
            y = x[i].sigmoid()
            z.append(y.view(bs, -1, self.no))
            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                z.append(y.view(bs, -1, self.no))

        return torch.cat(z, 1) if self.training else (torch.cat(z, 1), x)

    @staticmethod
    def _make_grid(nx=20, ny=20):
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()

And you can see the same results both in Pytorch and onnxruntime. Now, you can use: https://github.com/linghu8812/tensorrt_inference/blob/master/ScaledYOLOv4/

to generate the tensorRT engine and finish the inference.

lantudou commented 2 years ago

if you only want to use the onnx model, you can added the postprocess into the onnx model as below. But it will generate some ScatterND operations and cause some problem in TensorRT.

class Detect(nn.Module):
    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(Detect, self).__init__()
        self.stride = None  # strides computed during build
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer('anchors', a)  # shape(nl,na,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.export = False  # onnx export

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

            y = x[i].sigmoid()
            y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
            z.append(y.view(bs, -1, self.no))

        return torch.cat(z, 1) if self.training else (torch.cat(z, 1), x)

Now your onnx output shape is 1output_bboxes(class_num + 5), just take the output_bbox and use nms, you will get the final detection result.