mobilenetv3_large_100_miil_in21k，转换失败

error log

没有任何错误

ncnn版本

ncnn-20220420-windows-vs2019

model | 模型 | モデル

def create_mobilenet_v3_large_100_v1(embed_size=128, device = "cpu", pretrained = True):
    model_conv = timm.create_model('mobilenetv3_large_100_miil_in21k', pretrained=pretrained)
    for param in model_conv.parameters():
        param.requires_grad = False            

    classifier = nn.Sequential(        
        nn.Linear(1280, embed_size),        
    )
    model_conv.classifier = classifier

    model_conv = model_conv.to(device)

    return  model_conv

how to reproduce | 复现步骤 | 再現方法

用pytroch,onnx和onnx to ncnn输出模型（模型的数值是随机的）, 可以使用任何一张图片(为了方便，取名为00.jpg)

import cv2
import numpy as np
import timm
import torch

import torch.nn as nn

def convert_model_to_onnx(model, torch_input, outputs, np_input, device="cpu"):    
    import onnx
    import onnxruntime as rt

    def convert_to_onnx(model, output_name, input):    
        torch.onnx.export(model, input, output_name, verbose=False, opset_version=11, do_constant_folding=False)

        onnx_model = onnx.load(output_name)
        onnx.checker.check_model(onnx_model)

    def test_model_accuracy(export_model_name, raw_output, input):
        session = rt.InferenceSession(export_model_name, providers=['CPUExecutionProvider'])

        def to_numpy(tensor):
            return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

        print("onnx input shape = ", input.shape)
        # compute ONNX Runtime output prediction
        ort_inputs = {session.get_inputs()[0].name: (input)}

        for i in range(10):
            ort_outs = session.run(None, ort_inputs)
        print(ort_outs)    

        # compare ONNX Runtime and PyTorch results
        np.testing.assert_allclose(to_numpy(raw_output), ort_outs[0], rtol=1e-03, atol=1e-05)

        print("Exported model has been tested with ONNXRuntime, and the result looks good!")    

    export_model_name = "squeeze_net_2.onnx"
    convert_to_onnx(model, export_model_name, torch_input)
    test_model_accuracy(export_model_name, outputs, np_input)

def call_subprocess(cmd):
    import subprocess

    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
    out, err = p.communicate() 
    result = out.decode('utf-8').split('\n')
    for lin in result:
        if not lin.startswith('#'):
            print(lin)

def convert_model_to_ncnn(input_model_name = "squeeze_net_2.onnx"):
    import subprocess
    cmd = 'python -m onnxsim squeeze_net_2.onnx squeeze_net_2_simple.onnx'
    call_subprocess(cmd)

    ncnn_path = "../../3rdLibs/ncnn/ncnn-20220420-windows-vs2019/x64/bin/"
    cmd = '{}/onnx2ncnn.exe squeeze_net_2_simple.onnx squeeze_net_2_simple.param squeeze_net_2_simple.bin'.format(ncnn_path)
    call_subprocess(cmd)
    cmd = '{}/ncnnoptimize squeeze_net_2_simple.param squeeze_net_2_simple.bin squeeze_net_2_simple_opt.param squeeze_net_2_simple_opt.bin 65536'.format(ncnn_path)
    call_subprocess(cmd)

def preprocess_image(input_img):
    input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
    input_img = cv2.resize(input_img, (224, 224))
    input_img = input_img.astype(np.float32)
    input_img /= 255.0    

    mean_val = [0.485, 0.456, 0.406]
    std_val = [0.229, 0.224, 0.225]
    for i in range(3):
        input_img[:, :, i] = (input_img[:, :, i] - mean_val[i]) / std_val[i]

    input_img = np.transpose(input_img, [2, 0, 1])
    np_input = np.expand_dims(input_img, axis = 0)
    torch_input = torch.from_numpy(np_input).to(device).float()

    return torch_input, np_input

def read_image(img_pth):
    input_img = cv2.imread(img_pth)
    if input_img is not None:
        return preprocess_image(input_img)

    return None, None

def create_mobilenet_v3_large_100_v1(embed_size=128, device = "cpu", pretrained = True):
    model_conv = timm.create_model('mobilenetv3_large_100_miil_in21k', pretrained=pretrained)
    for param in model_conv.parameters():
        param.requires_grad = False            

    classifier = nn.Sequential(        
        nn.Linear(1280, embed_size),        
    )
    model_conv.classifier = classifier

    model_conv = model_conv.to(device)

    return  model_conv

device = "cpu"
model = create_mobilenet_v3_large_100_v1(device=device)
#model = timm.create_model('vit_base_patch16_224_miil_in21k', pretrained=False)
print(model)

torch.save(model.state_dict(), "some_model.pth")
model.load_state_dict(torch.load("some_model.pth", map_location=torch.device('cpu')))

model = model.to(device)
model = model.eval()

with torch.no_grad():
    torch_input, np_input = read_image("00.jpg")
    outputs = model(torch_input)
    convert_model_to_onnx(model, torch_input, outputs, np_input)
    convert_model_to_ncnn()

def print_embeddings():
    torch_input, np_input = read_image("00.jpg")
    with torch.no_grad():
        embeddings = model(torch_input)
        embeddings = embeddings.cpu().numpy().squeeze()
        for i in range(embeddings.shape[0]):
            print("{:3}:{:.5f}".format(i, embeddings[i]))

print_embeddings()

转换的过程中没有输出任何错误讯息。

然后用c++推导同一张图片

#ifdef THIS_IS_IOS
#include <ncnn/ncnn/net.h>
#else
#include <net.h>
#endif

#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#include <fmt/core.h>

#include <iostream>

void preprocess(cv::Mat const &image, ncnn::Mat& in)
{
    //this model expect rgb image
    in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_RGB, image.cols, image.rows);
    //mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
    float constexpr mean_vals[3] = {  123.675f, 116.28f, 103.53f};
    float constexpr norm_vals[3] = { 0.017125f, 0.017507f, 0.017429f};
    in.substract_mean_normalize(mean_vals, norm_vals);
}

int main()
{
    ncnn::Net net;
    net.load_param("models/random/squeeze_net_2_simple_opt.param");
    net.load_model("models/random/squeeze_net_2_simple_opt.bin");

    auto img = cv::imread("00.jpg");
    cv::resize(img, img, cv::Size(224,224));
    cv::cvtColor(img, img, cv::COLOR_BGR2RGB);

    ncnn::Mat input;
    preprocess(img, input);

    auto ex = net.create_extractor();
    ex.set_light_mode(false);
    ex.set_num_threads(4);

    std::string const input_name_ = net.input_names()[0];
    std::string const output_name_ = net.output_names()[net.output_names().size() - 1];

    ex.input(input_name_.c_str(), input);

    ncnn::Mat out;
    ex.extract(output_name_.c_str(), out);
    auto const *data = out.row(0);
    for(size_t i = 0; i != out.total(); ++i){
        std::cout<<fmt::format("{:3}:{:.5f}", i, data[i])<<std::endl;
    }

    return 0;
}

pytorch和ncnn的输出结果是不同的，以前5个为例

pytorch

0:-0.11283
1:0.39284
2:0.01229
3:-0.17599
4:0.17550

c++(ncnn)

0:-1.88119
1:0.52845
2:-0.72684
3:-0.06849
4:0.80439

如果我输出的是mobileNetV2，结果是正确的

def create_mobilenet_v2(embed_size = 128, device = default_device(), pretrained = True):
    model_conv = torchvision.models.mobilenet_v2(pretrained=pretrained)
    for param in model_conv.parameters():
        param.requires_grad = False

    linear_1 = nn.Linear(model_conv.last_channel, embed_size)
    nn.init.xavier_normal_(linear_1.weight)
    classifier = nn.Sequential(        
        linear_1,
        nn.BatchNorm1d(embed_size)
    )
    model_conv.classifier = classifier

    model_conv = model_conv.to(device)

    return  model_conv

请问我是不是转换的过程中犯了什么错呢？

转换模型时的输出讯息

Exported model has been tested with ONNXRuntime, and the result looks good! Simplifying... Ok!

fuse_convolution_activation Conv_0 Mul_2 fuse_convolution_activation Conv_7 Relu_8 fuse_convolution_activation Conv_12 Relu_13 fuse_convolution_activation Conv_18 Relu_19 fuse_convolution_activation Conv_23 Relu_24 fuse_convolution_activation Conv_29 Relu_30 fuse_convolution_activation Conv_34 Relu_35 fuse_convolution_activation Conv_41 Relu_42 fuse_convolution_activation Conv_46 Relu_47 fuse_convolution_activation Conv_53 Mul_55 fuse_convolution_activation Conv_60 Mul_62 fuse_convolution_activation Conv_68 Mul_70 fuse_convolution_activation Conv_76 Mul_78 fuse_convolution_activation Conv_84 Mul_86 fuse_convolution_activation Conv_91 Relu_92 fuse_convolution_activation Conv_97 Mul_99 fuse_convolution_activation Conv_104 Relu_105 fuse_convolution_activation Conv_111 Mul_113 fuse_convolution_activation Conv_118 Relu_119 fuse_convolution_activation Conv_124 Mul_126 fuse_convolution_activation Conv_131 Relu_132 fuse_convolution_activation Conv_138 Mul_140 fuse_convolution_activation Conv_145 Relu_146 fuse_convolution_activation Conv_152 Mul_154 fuse_convolution_activation Conv_156 Mul_158 fuse_convolutiondepthwise_activation Conv_3 Relu_4 fuse_convolutiondepthwise_activation Conv_9 Relu_10 fuse_convolutiondepthwise_activation Conv_14 Relu_15 fuse_convolutiondepthwise_activation Conv_20 Relu_21 fuse_convolutiondepthwise_activation Conv_31 Relu_32 fuse_convolutiondepthwise_activation Conv_43 Relu_44 fuse_convolutiondepthwise_activation Conv_56 Mul_58 fuse_convolutiondepthwise_activation Conv_63 Mul_65 fuse_convolutiondepthwise_activation Conv_71 Mul_73 fuse_convolutiondepthwise_activation Conv_79 Mul_81 fuse_convolutiondepthwise_activation Conv_87 Mul_89 fuse_convolutiondepthwise_activation Conv_100 Mul_102 fuse_convolutiondepthwise_activation Conv_114 Mul_116 fuse_convolutiondepthwise_activation Conv_127 Mul_129 fuse_convolutiondepthwise_activation Conv_141 Mul_143 replace_convolution_with_innerproduct_after_global_pooling GlobalAveragePool_155 Conv_156 eliminate_flatten_after_innerproduct Conv_156 Flatten_159 Input layer input.1 without shape info, shape_inference skipped Input layer input.1 without shape info, estimate_memory_footprint skipped

python env

Python 3.8.5

argon2-cffi==21.3.0 argon2-cffi-bindings==21.2.0 asttokens==2.0.5 attrs==21.4.0 backcall==0.2.0 black==22.1.0 bleach==4.1.0 certifi==2021.10.8 cffi==1.15.0 click==8.0.4 colorama==0.4.4 colour-checker-detection==0.1.3 Cython==0.29.28 -e git+https://github.com/ifzhang/ByteTrack@d742a3321c14a7412f024f2218142c7441c1b699#egg=cython_bbox&subdirectory=cython_bbox-0.1.3 debugpy==1.5.1 decorator==5.1.1 defusedxml==0.7.1 entrypoints==0.4 executing==0.8.2 flatbuffers==2.0 ipyfilechooser==0.6.0 ipykernel==6.9.1 ipython==8.0.1 ipython-genutils==0.2.0 ipywidgets==7.6.5 jedi==0.18.1 Jinja2==3.0.3 jsonschema==4.4.0 jupyter-client==7.1.2 jupyter-core==4.9.2 jupyterlab-pygments==0.1.2 jupyterlab-widgets==1.0.2 lap==0.4.0 MarkupSafe==2.1.0 matplotlib-inline==0.1.3 mistune==0.8.4 mypy-extensions==0.4.3 nbclient==0.5.11 nbconvert==6.4.2 nbformat==5.1.3 nest-asyncio==1.5.4 notebook==6.4.8 numpy==1.22.2 onnxruntime-gpu==1.10.0 opencv-contrib-python-headless==4.5.5.62 packaging==21.3 pandocfilters==1.5.0 parso==0.8.3 pathspec==0.9.0 pickleshare==0.7.5 platformdirs==2.5.1 prometheus-client==0.13.1 prompt-toolkit==3.0.28 protobuf==3.19.4 pure-eval==0.2.2 pycparser==2.21 pygame==2.1.2 Pygments==2.11.2 pyparsing==3.0.7 PyQt5==5.15.6 PyQt5-Qt5==5.15.2 PyQt5-sip==12.9.1 pyrsistent==0.18.1 python-dateutil==2.8.2 pywin32==303 pywinpty==2.0.2 pyzmq==22.3.0 scipy==1.8.0 Send2Trash==1.8.0 six==1.16.0 stack-data==0.2.0 terminado==0.13.1 testpath==0.6.0 thefuzz==0.19.0 tomli==2.0.1 tornado==6.1 traitlets==5.1.1 typing_extensions==4.1.1 wcwidth==0.2.5 webencodings==0.5.1 widgetsnbextension==3.5.2 wincertstore==0.2

Tencent / ncnn