Loading ONNX model fails with Fatal error: ConstantFill is not a registered function/op

tadas-subonis commented 5 years ago

Describe the bug I've saved PyTorch model, and now I am trying to load it on ONNXRuntime:

sess = rt.InferenceSession('ocr_model.onnx')

but this throws an error:

C:\Programs\Anaconda3\envs\scraper-python\lib\site-packages\onnxruntime\capi\session.py in __init__(self, path_or_bytes, sess_options)
     27 
     28         if isinstance(path_or_bytes, str):
---> 29             self._sess.load_model(path_or_bytes)
     30         elif isinstance(path_or_bytes, bytes):
     31             self._sess.read_bytes(path_or_bytes)

RuntimeError: [ONNXRuntimeError] : 1 : GENERAL ERROR : Load model from ocr_model.onnx failed:Fatal error: ConstantFill is not a registered function/op

System information

OS Platform and Distribution: Windows 10
ONNX Runtime version: onnxruntime==0.4.0
Python version: 3.7

pranavsharma commented 5 years ago

Can you attach the onnx model? How did you convert it? ConstantFill is not an onnx op; see here. On the surface, this looks like a converter issue to me.

tadas-subonis commented 5 years ago

I've converted it using:

import torch.onnx
dummy_input = torch.randn(1, 3, 32, 500).cuda()
torch.onnx.export(skorch_model.module, dummy_input, "ocr_model.onnx", export_params=True, )

where skorch_model.module is a standard torch.nn.Module

The ONNX file can be found here: https://send.firefox.com/download/04e009f5f8634e9c/#myOqDeOep_Zb2rHe4vzpRQ

tadas-subonis commented 5 years ago

PyTorch's version is pytorch=1.0.0=py3.7_cuda100_cudnn7_1

faxu commented 5 years ago

CC @spandantiwari @tadas-subonis can you try with pytorch 1.1 and see if this repros?

ebarsoum commented 5 years ago

@tadas-subonis can you try PyTorch 1.1? ConstantFill was an experimental OP that got replaced with ConstantOfShape.

tadas-subonis commented 5 years ago

You guys were right - after updating to 1.1.0 and re-exporting the model, it works.

Thanks for the help.

LJXLJXLJX commented 5 years ago

@tadas-subonis I am sorry to bother, it seems you were working on crnn. I am working on this but get another error

RuntimeError: [ONNXRuntimeError] : 1 : GENERAL ERROR : Load model from crnn.onnx failed:[TypeInferenceError] Attribute expected to have a one-dim tensor

Could you give me any suggestions? Thank you.

Followed is the graph definition.

import torch
import torch.nn as nn
import torch.onnx as onnx
from global_config import cfg

def initConvAndLinear(layer):
    nn.init.xavier_normal_(layer.weight)
    nn.init.constant_(layer.bias, 0.1)

def initLstm(layer):
    nn.init.xavier_normal_(layer.all_weights[0][0])
    nn.init.xavier_normal_(layer.all_weights[0][1])
    nn.init.xavier_normal_(layer.all_weights[1][0])
    nn.init.xavier_normal_(layer.all_weights[1][1])

class BidirectionalLSTM(nn.Module):

    def __init__(self, nIn, nHidden, nOut, init=False):
        super(BidirectionalLSTM, self).__init__()
        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
        if init: initLstm(self.rnn)
        self.embedding = nn.Linear(nHidden * 2, nOut)
        if init: initConvAndLinear(self.embedding)

    def forward(self, input):
        recurrent, _ = self.rnn(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)

        output = self.embedding(t_rec)  # [T * b, nOut]
        output = output.view(T, b, -1)

        return output

class Net(nn.Module):
    def __init__(self, init=False):
        super(Net, self).__init__()
        self._CNN(init)
        self._RNN(init)

    def _CNN(self, init):
        self.cnn = nn.Sequential()

        relu = nn.LeakyReLU(0.2)
        # assume input is (1,3,32,128)
        # conv1
        conv1 = nn.Conv2d(3, 64, (3, 3), 1, 1)
        if init: initConvAndLinear(conv1)
        self.cnn.add_module('conv1', conv1)
        self.cnn.add_module('relu1', relu)
        # (1,64,32,128)

        # max_pool1 TODO it may be a good idea to replace maxpool with conv
        max_pool1 = nn.MaxPool2d((2, 2), 2)
        self.cnn.add_module('max_pool1', max_pool1)
        # (1,64,16,64)

        # conv2
        conv2 = nn.Conv2d(64, 128, (3, 3), 1, 1)
        if init: initConvAndLinear(conv2)
        self.cnn.add_module('conv2', conv2)
        self.cnn.add_module('relu2', relu)
        # (1,128,16,64)

        # max_pool2
        max_pool2 = nn.MaxPool2d((2, 2), 2)
        self.cnn.add_module('max_pool2', max_pool2)
        # (1,128,8,32)

        # conv3
        conv3 = nn.Conv2d(128, 256, (3, 3), 1, 1)
        if init: initConvAndLinear(conv3)
        self.cnn.add_module('conv3', conv3)
        self.cnn.add_module('relu3', relu)
        # (1,256,8,32)

        # conv4
        conv4 = nn.Conv2d(256, 256, (3, 3), 1, 1)
        if init: initConvAndLinear(conv4)
        self.cnn.add_module('conv4', conv4)
        self.cnn.add_module('relu4', relu)
        # (1,256,8,32)

        # max_pool3
        max_pool3 = nn.MaxPool2d((2, 1), (2, 1))
        self.cnn.add_module('max_pool3', max_pool3)
        # (1,256,4,32)

        # conv5
        conv5 = nn.Conv2d(256, 512, (3, 3), 1, 1)
        if init: initConvAndLinear(conv5)
        self.cnn.add_module('conv5', conv5)
        self.cnn.add_module('relu5', relu)
        # (1,512,4,32)

        # batch normalization 1
        bn1 = nn.BatchNorm2d(512)
        self.cnn.add_module('bn1', bn1)
        # (1,512,4,32)

        # conv6
        conv6 = nn.Conv2d(512, 512, (3, 3), 1, 1)
        if init: initConvAndLinear(conv6)
        self.cnn.add_module('conv6', conv6)
        self.cnn.add_module('relu6', relu)
        # (1,512,4,32)

        # batch normalization 2
        bn2 = nn.BatchNorm2d(512)
        self.cnn.add_module('bn2', bn2)
        # (1,512,4,32)

        # max_pool4
        max_pool4 = nn.MaxPool2d((2, 1), (2, 1))
        self.cnn.add_module('max_pool4', max_pool4)
        # (1,512,2,32)

        # conv7
        conv7 = nn.Conv2d(512, 512, (2, 1), (2, 1), 0)
        if init: initConvAndLinear(conv7)
        self.cnn.add_module('conv7', conv7)
        self.cnn.add_module('relu7', relu)
        # (1,512,1,32)

    def _RNN(self, init):
        self.rnn = nn.Sequential()
        lstm = BidirectionalLSTM(512, cfg.ARCH.HIDDEN_UNIT, 512, init)
        self.rnn.add_module('lstm1', lstm)
        for i in range(2, cfg.ARCH.LSTM_LAYER_NUM):
            lstm = BidirectionalLSTM(512, cfg.ARCH.HIDDEN_UNIT, 512, init)
            self.rnn.add_module('lstm{0}'.format(i), lstm)

        lstm = BidirectionalLSTM(512, cfg.ARCH.HIDDEN_UNIT, cfg.ARCH.NUM_CLASS, init)
        self.rnn.add_module('lstm{0}'.format(cfg.ARCH.LSTM_LAYER_NUM), lstm)

    def forward(self, input):
        feature_map = self.cnn(input)
        feature_sequence = torch.squeeze(feature_map, 2).permute(2, 0, 1)
        output = self.rnn(feature_sequence)
        return output

microsoft / onnxruntime

Loading ONNX model fails with Fatal error: ConstantFill is not a registered function/op #1284