ONNX export failed: Couldn't export operator maskrcnn_benchmark::nms

schyun9212 commented 4 years ago

🐛 Bug

ONNX exporting failed. Maybe custom operator nms is not recognized.

To Reproduce

Steps to reproduce the behavior:

# %%
import torch

from PIL import Image
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
from maskrcnn_benchmark.structures.image_list import ImageList

from demo.utils import load_image, imshow, masking_image
from demo.transform import transform_image
import os

OVERWRITE_MODEL = True
TEST_IMAGE_PATH = "./sample.jpg"
# MODEL_DEVICE = "cuda"
MODEL_DEVICE = "cpu"
MODEL_PATH = f"./maskrcnn_{MODEL_DEVICE}.onnx"
ONNX_OPSET_VERSION = 10

config_file = "../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml"
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.DEVICE", MODEL_DEVICE])
cfg.freeze()

coco_demo = COCODemo(
    cfg,
    confidence_threshold=0.7,
    min_image_size=800,
)

class MaskRCNNModel(torch.nn.Module):
    def __init__(self):
        super(MaskRCNNModel, self).__init__()
        for param in coco_demo.model.parameters():
            param.requires_grad = False

    def forward(self, image):
        image_list = ImageList(image.unsqueeze(0), [(int(image.size(-2)), int(image.size(-1)))])

        result, = coco_demo.model(image_list)

        result = (result.bbox,
                result.get_field("labels"),
                result.get_field("mask"),
                result.get_field("scores"))
        return result

original_image = load_image(TEST_IMAGE_PATH)
image, t_width, t_height = transform_image(cfg, original_image)

height, width = original_image.shape[:-1]

# requires_grad must be false for tracing
if OVERWRITE_MODEL or not os.path.exists(MODEL_PATH):
    model = MaskRCNNModel()
    model.eval()
    torch.onnx.export(model, (image, ), MODEL_PATH,
                        do_constant_folding=True, opset_version=ONNX_OPSET_VERSION)

Expected behavior

RuntimeError                              Traceback (most recent call last)
~/Workspace/maskrcnn/maskrcnn-benchmark/demo/export_to_onnx.py in 
     55     model.eval()
     56     torch.onnx.export(model, (image, ), MODEL_PATH,
---> 57                         do_constant_folding=True, opset_version=ONNX_OPSET_VERSION)

~/.pyenv/versions/maskrcnn-tracing-latest/lib/python3.7/site-packages/torch/onnx/__init__.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
    141                         operator_export_type, opset_version, _retain_param_name,
    142                         do_constant_folding, example_outputs,
--> 143                         strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
    144 
    145 

~/.pyenv/versions/maskrcnn-tracing-latest/lib/python3.7/site-packages/torch/onnx/utils.py in export(model, args, f, export_params, verbose, training, input_names, output_names, aten, export_raw_ir, operator_export_type, opset_version, _retain_param_name, do_constant_folding, example_outputs, strip_doc_string, dynamic_axes, keep_initializers_as_inputs)
     64             _retain_param_name=_retain_param_name, do_constant_folding=do_constant_folding,
     65             example_outputs=example_outputs, strip_doc_string=strip_doc_string,
---> 66             dynamic_axes=dynamic_axes, keep_initializers_as_inputs=keep_initializers_as_inputs)
     67 
     68 

~/.pyenv/versions/maskrcnn-tracing-latest/lib/python3.7/site-packages/torch/onnx/utils.py in _export(model, args, f, export_params, verbose, training, input_names, output_names, operator_export_type, export_type, example_outputs, propagate, opset_version, _retain_param_name, do_constant_folding, strip_doc_string, dynamic_axes, keep_initializers_as_inputs, fixed_batch_size)
    392             proto, export_map = graph._export_onnx(
    393                 params_dict, opset_version, dynamic_axes, defer_weight_export,
--> 394                 operator_export_type, strip_doc_string, val_keep_init_as_ip)
    395         else:
    396             proto, export_map = graph._export_onnx(

RuntimeError: ONNX export failed: Couldn't export operator maskrcnn_benchmark::nms

Environment

PyTorch version: 1.3.1 Is debug build: No CUDA used to build PyTorch: 10.1.243

OS: Ubuntu 18.04.3 LTS GCC version: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0 CMake version: version 3.10.2

Python version: 3.7 Is CUDA available: Yes CUDA runtime version: 10.1.243 GPU models and configuration: GPU 0: GeForce RTX 2080 Ti Nvidia driver version: 440.44 cuDNN version: Probably one of the following: /usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudnn.so.7.6.5

Versions of relevant libraries: [pip3] numpy==1.18.1 [pip3] torch==1.3.1 [pip3] torchvision==0.4.2 [conda] Could not collect

schyun9212 commented 4 years ago

To export ONNX with custom operator, we should register custom operator explicitly

# Create custom symbolic function
from torch.onnx.symbolic_helper import parse_args
@parse_args('v', 'v', 'f', 'i')
def symbolic_foo_forward(g, input1, input2, attr1, attr2):
    return g.op("Foo", input1, input2, attr1_f=attr1, attr2_i=attr2)

# Register custom symbolic function
from torch.onnx import register_custom_op_symbolic
register_custom_op_symbolic('custom_ops::foo_forward', symbolic_foo_forward, 9)

class FooModel(torch.nn.Module):
    def __init__(self, attr1, attr2):
        super(FooModule, self).__init__()
        self.attr1 = attr1
        self.attr2 = attr2

    def forward(self, input1, input2):
        # Calling custom op
        return torch.ops.custom_ops.foo_forward(input1, input2, self.attr1, self.attr2)

model = FooModel(attr1, attr2)
torch.onnx.export(model, (dummy_input1, dummy_input2), 'model.onnx')

https://pytorch.org/docs/stable/onnx.html#custom-operators

schyun9212 commented 4 years ago

I registered custom operators and onnx model seems to be successfully created. But a validation error is occurred when I load the model.

from torch.onnx.symbolic_helper import parse_args
@parse_args('v', 'v', 'f')
def symbolic_nms(g, dets, scores, threshold):
    # Constant value must be converted to tensor
    threshold = g.op("Constant", value_t=torch.tensor(threshold, dtype=torch.float))
    return g.op("nms", dets, scores, threshold)

@parse_args('v', 'v', 'f', 'i', 'i', 'i')
def symbolic_roi_align_forward(g, grad, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
    # Constant value must be converted to tensor
    spatial_scale = g.op("Constant", value_t=torch.tensor(spatial_scale, dtype=torch.float))
    pooled_height = g.op("Constant", value_t=torch.tensor(pooled_height, dtype=torch.int64))
    pooled_width = g.op("Constant", value_t=torch.tensor(pooled_width, dtype=torch.int64))
    sampling_ratio = g.op("Constant", value_t=torch.tensor(sampling_ratio, dtype=torch.int64))
    return g.op("roi_align_foward", grad, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio)

from torch.onnx import register_custom_op_symbolic
register_custom_op_symbolic('maskrcnn_benchmark::nms', symbolic_nms, 10)
register_custom_op_symbolic('maskrcnn_benchmark::roi_align_forward', symbolic_roi_align_forward, 10)

model = MaskRCNNModel()
model.eval()
torch.onnx.export(model, (image, ), MODEL_PATH,
                    do_constant_folding=True,
                    opset_version=ONNX_OPSET_VERSION)

Error message is

Traceback (most recent call last):
  File "export_to_onnx.py", line 89, in <module>
    onnx.checker.check_model(loaded_onnx_model)
  File "/home/jade/.pyenv/versions/3.7.6/envs/maskrcnn-tracing-latest/lib/python3.7/site-packages/onnx/checker.py", line 91, in check_model
    C.check_model(model.SerializeToString())
onnx.onnx_cpp2py_export.checker.ValidationError: No Op registered for nms with domain_version of 10

==> Context: Bad node spec: input: "656" input: "658" input: "659" output: "660" op_type: "nms"

schyun9212 / maskrcnn-benchmark