Unable to export detection model to ONNX

AakashKumarNain commented 4 years ago

🐛 Bug

I was trying to convert the pretrained resnet50_fpn detection model to onnx but I got a runtime error while trying it.

To Reproduce

import os
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# load model
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

#converison
batch_size=1
x = torch.randn(batch_size, 3, 300, 300, requires_grad=True)
outputs = ['boxes', 'labels', 'scores', 'masks']
torch.onnx.export(model,                     # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "resnet50detection.onnx",  # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = outputs,    # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable lenght axes
                                'output' : {0 : 'batch_size'}})

Expected behavior

An onnx format based converted model

Environment

PyTorch / torchvision Version (e.g., 1.0 / 0.4.0): 1.4/0.5
OS (e.g., Linux): Linux
How you installed PyTorch / torchvision (conda, pip, source): conda
Build command you used (if compiling from source):
Python version: 3.7
CUDA/cuDNN version: N/A
GPU models and configuration: N/A
Any other relevant information:

Additional context

Here is the error I am getting

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-6-ce14949e7fa3> in <module>
     11                   output_names = outputs,    # the model's output names
     12                   dynamic_axes={'input' : {0 : 'batch_size'},    # variable lenght axes
---> 13                                 'output' : {0 : 'batch_size'}})

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/__init__.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
    146                         operator_export_type, opset_version, _retain_param_name,
    147                         do_constant_folding, example_outputs,
--> 148                         strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
    149 
    150 

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/utils.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
     64             _retain_param_name=_retain_param_name, do_constant_folding=do_constant_folding,
     65             example_outputs=example_outputs, strip_doc_string=strip_doc_string,
---> 66             dynamic_axes=dynamic_axes, keep_initializers_as_inputs=keep_initializers_as_inputs)
     67 
     68 

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/utils.py in _export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type, export_type, example_outputs, propagate, opset_version, _retain_param_name, do_constant_folding, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, fixed_batch_size)
    414                                                         example_outputs, propagate,
    415                                                         _retain_param_name, do_constant_folding,
--> 416                                                         fixed_batch_size=fixed_batch_size)
    417 
    418         # TODO: Don't allocate a in-memory string for the protobuf

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/utils.py in _model_to_graph(model, args, verbose, training, input_names, output_names, operator_export_type, example_outputs, propagate, _retain_param_name, do_constant_folding, _disable_torch_constant_prop, fixed_batch_size)
    294     graph = _optimize_graph(graph, operator_export_type,
    295                             _disable_torch_constant_prop=_disable_torch_constant_prop,
--> 296                             fixed_batch_size=fixed_batch_size, params_dict=params_dict)
    297 
    298     if isinstance(model, torch.jit.ScriptModule) or isinstance(model, torch.jit.ScriptFunction):

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/utils.py in _optimize_graph(graph, operator_export_type, _disable_torch_constant_prop, fixed_batch_size, params_dict)
    133         torch._C._jit_pass_erase_number_types(graph)
    134 
--> 135         graph = torch._C._jit_pass_onnx(graph, operator_export_type)
    136         torch._C._jit_pass_lint(graph)
    137 

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/__init__.py in _run_symbolic_function(*args, **kwargs)
    177 def _run_symbolic_function(*args, **kwargs):
    178     from torch.onnx import utils
--> 179     return utils._run_symbolic_function(*args, **kwargs)
    180 
    181 

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/utils.py in _run_symbolic_function(g, n, inputs, env, operator_export_type)
    655                                   .format(op_name, opset_version, op_name))
    656                 op_fn = sym_registry.get_registered_op(op_name, '', opset_version)
--> 657                 return op_fn(g, *inputs, **attrs)
    658 
    659         elif ns == "prim":

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/symbolic_helper.py in wrapper(g, *args)
    126             # some args may be optional, so the length may be smaller
    127             assert len(arg_descriptors) >= len(args)
--> 128             args = [_parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors)]
    129             return fn(g, *args)
    130         # In Python 2 functools.wraps chokes on partially applied functions, so we need this as a workaround

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/symbolic_helper.py in <listcomp>(.0)
    126             # some args may be optional, so the length may be smaller
    127             assert len(arg_descriptors) >= len(args)
--> 128             args = [_parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors)]
    129             return fn(g, *args)
    130         # In Python 2 functools.wraps chokes on partially applied functions, so we need this as a workaround

~/miniconda3/envs/torchenv/lib/python3.7/site-packages/torch/onnx/symbolic_helper.py in _parse_arg(value, desc)
     79                 if v.node().kind() != 'onnx::Constant':
     80                     raise RuntimeError("Failed to export an ONNX attribute '" + v.node().kind() +
---> 81                                        "', since it's not constant, please try to make "
     82                                        "things (e.g., kernel size) static if possible")
     83             return [int(v.node()['value']) for v in value.node().inputs()]

RuntimeError: Failed to export an ONNX attribute 'onnx::Sub', since it's not constant, please try to make things (e.g., kernel size) static if possible

mzy97 commented 4 years ago

opset_version=10 if it set to 11, the export will be normal except some warning. but the result seems good

AakashKumarNain commented 4 years ago

@mzy97 I tried that but ran into some errors. Is it necessary to build torch from source for this?

mzy97 commented 4 years ago

I use docker image

fmassa commented 4 years ago

Hi,

Due to some recent changes in PyTorch, ONNX export for mask r-cnn and keypoint r-cnn will only work with PyTorch 1.4 and torchvision 0.5.0.

Newer versions of PyTorch or torchvision won't work for now, but we are working fixing this.

cc @neginraoof

hashJoe commented 4 years ago

@fmassa Hi, I'm facing the same issue exporting a fine-tuned Keypoint R-CNN with Mobilenet V2 backbone model to ONNX. Following your comment, I downgraded to PyTorch 1.4 and TorchVision 0.5.0 and I'm still facing the same issue (even after re-training the whole network):

...line 81, in _parse_arg "', since it's not constant, please try to make " RuntimeError: Failed to export an ONNX attribute 'onnx::Sub', since it's not constant, please try to make things (e.g., kernel size) static if possible

Could you assist me with that please?

fmassa commented 4 years ago

@neginraoof could you have a look?

hashJoe commented 4 years ago

I added the following lines after line 78 in symbolic_helper.py to check the node which is causing the error: print(v.node()) print(v.node().kind())

Here is my full output:

C:\Users\yhashisho\Projects\git\keypoints-detection\venv\Scripts\python.exe C:/Users/yhashisho/Projects/git/keypoints-detection/pytorch-train/export_to_ONNX.py
PyTorch Version:  1.4.0
Torchvision Version:  0.5.0
C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\tensor.py:461: RuntimeWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results).
  'incorrect results).', category=RuntimeWarning)
C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\symbolic_helper.py:248: UserWarning: You are trying to export the model with onnx:Upsample for ONNX opset version 9. This operator might cause results to not match the expected results by PyTorch.
ONNX's Upsample/Resize operator did not match Pytorch's Interpolation until opset 11. Attributes to determine how to transform the input were added in onnx:Resize in opset 11 to support Pytorch's behavior (like coordinate_transformation_mode and nearest_mode).
We recommend using opset 11 and above for models using this operator. 
  "" + str(_export_onnx_opset_version) + ". "
%482 : Long() = onnx::Constant[value={0}]()

onnx::Constant
%481 : Long() = onnx::Sub(%460, %475) # C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torchvision\models\detection\transform.py:132:0

onnx::Sub
Traceback (most recent call last):
  File "C:/Users/yhashisho/Projects/git/keypoints-detection/pytorch-train/export_to_ONNX.py", line 62, in <module>
    main()
  File "C:/Users/yhashisho/Projects/git/keypoints-detection/pytorch-train/export_to_ONNX.py", line 58, in main
    export_to_ONNX(model, [image], config.ONNX_path, opset_version=11)
  File "C:/Users/yhashisho/Projects/git/keypoints-detection/pytorch-train/export_to_ONNX.py", line 49, in export_to_ONNX
    torch.onnx.export(model, input, path, opset_version)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\__init__.py", line 148, in export
    strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\utils.py", line 66, in export
    dynamic_axes=dynamic_axes, keep_initializers_as_inputs=keep_initializers_as_inputs)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\utils.py", line 416, in _export
    fixed_batch_size=fixed_batch_size)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\utils.py", line 296, in _model_to_graph
    fixed_batch_size=fixed_batch_size, params_dict=params_dict)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\utils.py", line 135, in _optimize_graph
    graph = torch._C._jit_pass_onnx(graph, operator_export_type)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\__init__.py", line 179, in _run_symbolic_function
    return utils._run_symbolic_function(*args, **kwargs)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\utils.py", line 657, in _run_symbolic_function
    return op_fn(g, *inputs, **attrs)
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\symbolic_helper.py", line 130, in wrapper
    args = [_parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors)]
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\symbolic_helper.py", line 130, in <listcomp>
    args = [_parse_arg(arg, arg_desc) for arg, arg_desc in zip(args, arg_descriptors)]
  File "C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torch\onnx\symbolic_helper.py", line 83, in _parse_arg
    "', since it's not constant, please try to make "
RuntimeError: Failed to export an ONNX attribute 'onnx::Sub', since it's not constant, please try to make things (e.g., kernel size) static if possible

Process finished with exit code 1

In addition to that, I set opset_version=11 and it is still giving me warning that opset_version used is 9, which is weird ..

I guess this is causing the error: padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]

in C:\Users\yhashisho\Projects\git\keypoints-detection\venv\lib\site-packages\torchvision\models\detection\transform.py:132:0:

# _onnx_batch_images() is an implementation of
    # batch_images() that is supported by ONNX tracing.
    @torch.jit.unused
    def _onnx_batch_images(self, images, size_divisible=32):
        # type: (List[Tensor], int) -> Tensor
        max_size = []
        for i in range(images[0].dim()):
            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
            max_size.append(max_size_i)
        stride = size_divisible
        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
        max_size = tuple(max_size)

        # work around for
        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
        # which is not yet supported in onnx
        padded_imgs = []
        for img in images:
            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
            padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
            padded_imgs.append(padded_img)

        return torch.stack(padded_imgs)

neginraoof commented 4 years ago

Please try opset 11 export. Export of the detection models Faster RCNN, Mask RCNN and Keypoint RCNN is enabled using ONNX opset 11. Would you please try setting opset_version=11 at export and see if you still get this error?

hashJoe commented 4 years ago

Please try opset 11 export. Export of the detection models Faster RCNN, Mask RCNN and Keypoint RCNN is enabled using ONNX opset 11. Would you please try setting opset_version=11 at export and see if you still get this error?

~~It is already set to 11~~

Ok, it is fixed now. I passed opset_version to the wrong function parameter:

Error:

def export_to_ONNX(model, input, path, opset_version=11):
    torch.onnx.export(model, input, path, opset_version)

Fix:

def export_to_ONNX(model, input, path, opset_version=11):
    torch.onnx.export(model, input, path, opset_version=opset_version)

pytorch / vision