Tencent / ncnn

ncnn is a high-performance neural network inference framework optimized for the mobile platform
Other
20.1k stars 4.13k forks source link

mobilenetv3_large_100_miil_in21k,转换失败 #3723

Open stereomatchingkiss opened 2 years ago

stereomatchingkiss commented 2 years ago

error log

没有任何错误

ncnn版本

ncnn-20220420-windows-vs2019

model | 模型 | モデル

def create_mobilenet_v3_large_100_v1(embed_size=128, device = "cpu", pretrained = True):
    model_conv = timm.create_model('mobilenetv3_large_100_miil_in21k', pretrained=pretrained)
    for param in model_conv.parameters():
        param.requires_grad = False            

    classifier = nn.Sequential(        
        nn.Linear(1280, embed_size),        
    )
    model_conv.classifier = classifier

    model_conv = model_conv.to(device)

    return  model_conv

how to reproduce | 复现步骤 | 再現方法

  1. 用pytroch,onnx和onnx to ncnn输出模型(模型的数值是随机的), 可以使用任何一张图片(为了方便,取名为00.jpg)
import cv2
import numpy as np
import timm
import torch

import torch.nn as nn

def convert_model_to_onnx(model, torch_input, outputs, np_input, device="cpu"):    
    import onnx
    import onnxruntime as rt

    def convert_to_onnx(model, output_name, input):    
        torch.onnx.export(model, input, output_name, verbose=False, opset_version=11, do_constant_folding=False)

        onnx_model = onnx.load(output_name)
        onnx.checker.check_model(onnx_model)

    def test_model_accuracy(export_model_name, raw_output, input):
        session = rt.InferenceSession(export_model_name, providers=['CPUExecutionProvider'])

        def to_numpy(tensor):
            return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

        print("onnx input shape = ", input.shape)
        # compute ONNX Runtime output prediction
        ort_inputs = {session.get_inputs()[0].name: (input)}

        for i in range(10):
            ort_outs = session.run(None, ort_inputs)
        print(ort_outs)    

        # compare ONNX Runtime and PyTorch results
        np.testing.assert_allclose(to_numpy(raw_output), ort_outs[0], rtol=1e-03, atol=1e-05)

        print("Exported model has been tested with ONNXRuntime, and the result looks good!")    

    export_model_name = "squeeze_net_2.onnx"
    convert_to_onnx(model, export_model_name, torch_input)
    test_model_accuracy(export_model_name, outputs, np_input)

def call_subprocess(cmd):
    import subprocess

    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
    out, err = p.communicate() 
    result = out.decode('utf-8').split('\n')
    for lin in result:
        if not lin.startswith('#'):
            print(lin)

def convert_model_to_ncnn(input_model_name = "squeeze_net_2.onnx"):
    import subprocess
    cmd = 'python -m onnxsim squeeze_net_2.onnx squeeze_net_2_simple.onnx'
    call_subprocess(cmd)

    ncnn_path = "../../3rdLibs/ncnn/ncnn-20220420-windows-vs2019/x64/bin/"
    cmd = '{}/onnx2ncnn.exe squeeze_net_2_simple.onnx squeeze_net_2_simple.param squeeze_net_2_simple.bin'.format(ncnn_path)
    call_subprocess(cmd)
    cmd = '{}/ncnnoptimize squeeze_net_2_simple.param squeeze_net_2_simple.bin squeeze_net_2_simple_opt.param squeeze_net_2_simple_opt.bin 65536'.format(ncnn_path)
    call_subprocess(cmd)

def preprocess_image(input_img):
    input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
    input_img = cv2.resize(input_img, (224, 224))
    input_img = input_img.astype(np.float32)
    input_img /= 255.0    

    mean_val = [0.485, 0.456, 0.406]
    std_val = [0.229, 0.224, 0.225]
    for i in range(3):
        input_img[:, :, i] = (input_img[:, :, i] - mean_val[i]) / std_val[i]

    input_img = np.transpose(input_img, [2, 0, 1])
    np_input = np.expand_dims(input_img, axis = 0)
    torch_input = torch.from_numpy(np_input).to(device).float()

    return torch_input, np_input

def read_image(img_pth):
    input_img = cv2.imread(img_pth)
    if input_img is not None:
        return preprocess_image(input_img)

    return None, None

def create_mobilenet_v3_large_100_v1(embed_size=128, device = "cpu", pretrained = True):
    model_conv = timm.create_model('mobilenetv3_large_100_miil_in21k', pretrained=pretrained)
    for param in model_conv.parameters():
        param.requires_grad = False            

    classifier = nn.Sequential(        
        nn.Linear(1280, embed_size),        
    )
    model_conv.classifier = classifier

    model_conv = model_conv.to(device)

    return  model_conv

device = "cpu"
model = create_mobilenet_v3_large_100_v1(device=device)
#model = timm.create_model('vit_base_patch16_224_miil_in21k', pretrained=False)
print(model)

torch.save(model.state_dict(), "some_model.pth")
model.load_state_dict(torch.load("some_model.pth", map_location=torch.device('cpu')))

model = model.to(device)
model = model.eval()

with torch.no_grad():
    torch_input, np_input = read_image("00.jpg")
    outputs = model(torch_input)
    convert_model_to_onnx(model, torch_input, outputs, np_input)
    convert_model_to_ncnn()

def print_embeddings():
    torch_input, np_input = read_image("00.jpg")
    with torch.no_grad():
        embeddings = model(torch_input)
        embeddings = embeddings.cpu().numpy().squeeze()
        for i in range(embeddings.shape[0]):
            print("{:3}:{:.5f}".format(i, embeddings[i]))

print_embeddings()

转换的过程中没有输出任何错误讯息。

  1. 然后用c++推导同一张图片
#ifdef THIS_IS_IOS
#include <ncnn/ncnn/net.h>
#else
#include <net.h>
#endif

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#include <fmt/core.h>

#include <iostream>

void preprocess(cv::Mat const &image, ncnn::Mat& in)
{
    //this model expect rgb image
    in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_RGB, image.cols, image.rows);
    //mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
    float constexpr mean_vals[3] = {  123.675f, 116.28f, 103.53f};
    float constexpr norm_vals[3] = { 0.017125f, 0.017507f, 0.017429f};
    in.substract_mean_normalize(mean_vals, norm_vals);
}

int main()
{
    ncnn::Net net;
    net.load_param("models/random/squeeze_net_2_simple_opt.param");
    net.load_model("models/random/squeeze_net_2_simple_opt.bin");

    auto img = cv::imread("00.jpg");
    cv::resize(img, img, cv::Size(224,224));
    cv::cvtColor(img, img, cv::COLOR_BGR2RGB);

    ncnn::Mat input;
    preprocess(img, input);

    auto ex = net.create_extractor();
    ex.set_light_mode(false);
    ex.set_num_threads(4);

    std::string const input_name_ = net.input_names()[0];
    std::string const output_name_ = net.output_names()[net.output_names().size() - 1];

    ex.input(input_name_.c_str(), input);

    ncnn::Mat out;
    ex.extract(output_name_.c_str(), out);
    auto const *data = out.row(0);
    for(size_t i = 0; i != out.total(); ++i){
        std::cout<<fmt::format("{:3}:{:.5f}", i, data[i])<<std::endl;
    }

    return 0;
}

pytorch和ncnn的输出结果是不同的,以前5个为例

pytorch

0:-0.11283
1:0.39284
2:0.01229
3:-0.17599
4:0.17550

c++(ncnn)

0:-1.88119
1:0.52845
2:-0.72684
3:-0.06849
4:0.80439

如果我输出的是mobileNetV2,结果是正确的

def create_mobilenet_v2(embed_size = 128, device = default_device(), pretrained = True):
    model_conv = torchvision.models.mobilenet_v2(pretrained=pretrained)
    for param in model_conv.parameters():
        param.requires_grad = False

    linear_1 = nn.Linear(model_conv.last_channel, embed_size)
    nn.init.xavier_normal_(linear_1.weight)
    classifier = nn.Sequential(        
        linear_1,
        nn.BatchNorm1d(embed_size)
    )
    model_conv.classifier = classifier

    model_conv = model_conv.to(device)

    return  model_conv

请问我是不是转换的过程中犯了什么错呢?

转换模型时的输出讯息

Exported model has been tested with ONNXRuntime, and the result looks good! Simplifying... Ok!

fuse_convolution_activation Conv_0 Mul_2 fuse_convolution_activation Conv_7 Relu_8 fuse_convolution_activation Conv_12 Relu_13 fuse_convolution_activation Conv_18 Relu_19 fuse_convolution_activation Conv_23 Relu_24 fuse_convolution_activation Conv_29 Relu_30 fuse_convolution_activation Conv_34 Relu_35 fuse_convolution_activation Conv_41 Relu_42 fuse_convolution_activation Conv_46 Relu_47 fuse_convolution_activation Conv_53 Mul_55 fuse_convolution_activation Conv_60 Mul_62 fuse_convolution_activation Conv_68 Mul_70 fuse_convolution_activation Conv_76 Mul_78 fuse_convolution_activation Conv_84 Mul_86 fuse_convolution_activation Conv_91 Relu_92 fuse_convolution_activation Conv_97 Mul_99 fuse_convolution_activation Conv_104 Relu_105 fuse_convolution_activation Conv_111 Mul_113 fuse_convolution_activation Conv_118 Relu_119 fuse_convolution_activation Conv_124 Mul_126 fuse_convolution_activation Conv_131 Relu_132 fuse_convolution_activation Conv_138 Mul_140 fuse_convolution_activation Conv_145 Relu_146 fuse_convolution_activation Conv_152 Mul_154 fuse_convolution_activation Conv_156 Mul_158 fuse_convolutiondepthwise_activation Conv_3 Relu_4 fuse_convolutiondepthwise_activation Conv_9 Relu_10 fuse_convolutiondepthwise_activation Conv_14 Relu_15 fuse_convolutiondepthwise_activation Conv_20 Relu_21 fuse_convolutiondepthwise_activation Conv_31 Relu_32 fuse_convolutiondepthwise_activation Conv_43 Relu_44 fuse_convolutiondepthwise_activation Conv_56 Mul_58 fuse_convolutiondepthwise_activation Conv_63 Mul_65 fuse_convolutiondepthwise_activation Conv_71 Mul_73 fuse_convolutiondepthwise_activation Conv_79 Mul_81 fuse_convolutiondepthwise_activation Conv_87 Mul_89 fuse_convolutiondepthwise_activation Conv_100 Mul_102 fuse_convolutiondepthwise_activation Conv_114 Mul_116 fuse_convolutiondepthwise_activation Conv_127 Mul_129 fuse_convolutiondepthwise_activation Conv_141 Mul_143 replace_convolution_with_innerproduct_after_global_pooling GlobalAveragePool_155 Conv_156 eliminate_flatten_after_innerproduct Conv_156 Flatten_159 Input layer input.1 without shape info, shape_inference skipped Input layer input.1 without shape info, estimate_memory_footprint skipped

python env

Python 3.8.5

argon2-cffi==21.3.0 argon2-cffi-bindings==21.2.0 asttokens==2.0.5 attrs==21.4.0 backcall==0.2.0 black==22.1.0 bleach==4.1.0 certifi==2021.10.8 cffi==1.15.0 click==8.0.4 colorama==0.4.4 colour-checker-detection==0.1.3 Cython==0.29.28 -e git+https://github.com/ifzhang/ByteTrack@d742a3321c14a7412f024f2218142c7441c1b699#egg=cython_bbox&subdirectory=cython_bbox-0.1.3 debugpy==1.5.1 decorator==5.1.1 defusedxml==0.7.1 entrypoints==0.4 executing==0.8.2 flatbuffers==2.0 ipyfilechooser==0.6.0 ipykernel==6.9.1 ipython==8.0.1 ipython-genutils==0.2.0 ipywidgets==7.6.5 jedi==0.18.1 Jinja2==3.0.3 jsonschema==4.4.0 jupyter-client==7.1.2 jupyter-core==4.9.2 jupyterlab-pygments==0.1.2 jupyterlab-widgets==1.0.2 lap==0.4.0 MarkupSafe==2.1.0 matplotlib-inline==0.1.3 mistune==0.8.4 mypy-extensions==0.4.3 nbclient==0.5.11 nbconvert==6.4.2 nbformat==5.1.3 nest-asyncio==1.5.4 notebook==6.4.8 numpy==1.22.2 onnxruntime-gpu==1.10.0 opencv-contrib-python-headless==4.5.5.62 packaging==21.3 pandocfilters==1.5.0 parso==0.8.3 pathspec==0.9.0 pickleshare==0.7.5 platformdirs==2.5.1 prometheus-client==0.13.1 prompt-toolkit==3.0.28 protobuf==3.19.4 pure-eval==0.2.2 pycparser==2.21 pygame==2.1.2 Pygments==2.11.2 pyparsing==3.0.7 PyQt5==5.15.6 PyQt5-Qt5==5.15.2 PyQt5-sip==12.9.1 pyrsistent==0.18.1 python-dateutil==2.8.2 pywin32==303 pywinpty==2.0.2 pyzmq==22.3.0 scipy==1.8.0 Send2Trash==1.8.0 six==1.16.0 stack-data==0.2.0 terminado==0.13.1 testpath==0.6.0 thefuzz==0.19.0 tomli==2.0.1 tornado==6.1 traitlets==5.1.1 typing_extensions==4.1.1 wcwidth==0.2.5 webencodings==0.5.1 widgetsnbextension==3.5.2 wincertstore==0.2

nihui commented 1 month ago

针对onnx模型转换的各种问题,推荐使用最新的pnnx工具转换到ncnn In view of various problems in onnx model conversion, it is recommended to use the latest pnnx tool to convert your model to ncnn

pip install pnnx
pnnx model.onnx inputshape=[1,3,224,224]

详细参考文档 Detailed reference documentation https://github.com/pnnx/pnnx https://github.com/Tencent/ncnn/wiki/use-ncnn-with-pytorch-or-onnx#how-to-use-pnnx