microsoft / onnxruntime

ONNX Runtime: cross-platform, high performance ML inferencing and training accelerator
https://onnxruntime.ai
MIT License
14.73k stars 2.94k forks source link

Error when creating a pytorch operator in onnxruntime #19692

Closed ben-da6 closed 7 months ago

ben-da6 commented 8 months ago

Describe the issue

I am trying to follow these:

to create a library of custom operators which I can use from python as torch ops, and then load the custom ops in the runtime.

To reproduce

  1. build the .so from the following
#include <torch/torch.h>
#include <iostream>
#include <torch/script.h>
#include "onnxruntime_cxx_api.h"
#include "onnxruntime_lite_custom_op.h"

using namespace Ort::Custom;

torch::Tensor pie(torch::Tensor x) {

  float* x_data = x.data_ptr<float>();
  int num_features = x.size(1);
  torch::Tensor output = torch::empty_like(x);
  float* out = output.data_ptr<float>();

  for (size_t batch = 0; batch < x.size(0); batch++) {
    for (size_t i = 0; i < x.size(1); i++) {
      int linear_index = batch * num_features + i;
      out[linear_index] = 3.145;
    }
  }
  return output;
}

static auto registry = torch::RegisterOperators("my_namespace::pie_maker", &pie);

void ort_pie(const Ort::Custom::Tensor<float>& x, Ort::Custom::Tensor<float>& output) {
  auto input_shape = x.Shape();
  auto output_data = output.Allocate(input_shape);
  int num_features = input_shape[1];

  for (size_t batch = 0; batch < input_shape[0]; batch++) {
    for (size_t i = 0; i < input_shape[1]; i++) {
      int linear_index = batch * num_features + i;
      output_data[linear_index] = 3.145;
    }
  } 
}

void RegisterOps(Ort::CustomOpDomain& domain) {
  std::unique_ptr<OrtLiteCustomOp> custom_op_one{
    Ort::Custom::CreateLiteCustomOp(
      "Pie", 
      "CPUExecutionProvider", 
      ort_pie)};
  domain.Add(custom_op_one.get());
}

static void AddOrtCustomOpDomainToContainer(Ort::CustomOpDomain&& domain) {
  static std::vector<Ort::CustomOpDomain> ort_custom_op_domain_container;
  static std::mutex ort_custom_op_domain_mutex;
  std::lock_guard<std::mutex> lock(ort_custom_op_domain_mutex);
  ort_custom_op_domain_container.push_back(std::move(domain));
}

extern "C" {

  ORT_EXPORT OrtStatus* ORT_API_CALL RegisterCustomOps(OrtSessionOptions* options, const OrtApiBase* api);

}

OrtStatus* ORT_API_CALL RegisterCustomOps(OrtSessionOptions* options, const OrtApiBase* api) {
  Ort::Global<void>::api_ = api->GetApi(ORT_API_VERSION);
  OrtStatus* result = nullptr;
  Ort::CustomOpDomain custom_op_domain("org.pytorch.my_domain");
  RegisterOps(custom_op_domain);
  Ort::UnownedSessionOptions session_options(options);
  session_options.Add(custom_op_domain);
  AddOrtCustomOpDomainToContainer(std::move(custom_op_domain));
  std::cout << "REGISTERED!!!!" << std::endl;
  return result;
 }
  1. run
    
    import numpy as np
    import onnxruntime as ort
    import torch
    from torch.onnx import register_custom_op_symbolic

class MyOp(torch.nn.Module): def forward(self, x: torch.Tensor) -> torch.Tensor: return torch.ops.my_namespace.pie_maker( x, )

def register(): def custom_normalization(g, x): output = g.op( "my_domain::Pie", x, ) output_type = x.type() output.setType(output_type) return output opset_version = 16 register_custom_op_symbolic("my_namespace::pie_maker", custom_normalization, opset_version)

def test_normalisation() -> None: model = MyOp() BATCH_SIZE = 1 NUM_FEATURES = 10 batch = torch.rand(BATCH_SIZE, NUM_FEATURES) torch.onnx.export( model, batch, "model.onnx", export_params=True, do_constant_folding=True, input_names=["input"], output_names=["output"], dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, opset_version=16, custom_opsets={"my_domain": 16}, )

sess_options = ort.SessionOptions()
sess_options.inter_op_num_threads = 1
sess_options.intra_op_num_threads = 1
sess_options.register_custom_ops_library("libonnx-op-shared.so")
ort_sess = ort.InferenceSession(
    "model.onnx",
    providers=["CPUExecutionProvider"],
    sess_options=sess_options,
)
inputs = {"input": np.random.uniform(size=(1, NUM_FEATURES)).astype(np.float32)}
ort_sess.run(
    ["output"],
    inputs,
)

if name == "main": torch.ops.load_library("libonnx-op-shared.so") register() test_normalisation()

3. The python outputs

REGISTERED!!!! EP Error tensor type 1489264000 is not supported when using ['CPUExecutionProvider'] Falling back to ['CPUExecutionProvider'] and retrying. Traceback (most recent call last): File "/persist/code/RQFluyt/RQ.Fluyt/.venv/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 419, in init self._create_inference_session(providers, provider_options, disabled_optimizers) File "/persist/code/RQFluyt/RQ.Fluyt/.venv/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 460, in _create_inference_session sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) RuntimeError: tensor type 1489264000 is not supported

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "/persist/code/RQFluyt/RQ.Fluyt/Fluyt/onnx_ops/build/../my_op.py", line 66, in test_normalisation() File "/persist/code/RQFluyt/RQ.Fluyt/Fluyt/onnx_ops/build/../my_op.py", line 50, in test_normalisation ort_sess = ort.InferenceSession( File "/persist/code/RQFluyt/RQ.Fluyt/.venv/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 430, in init raise fallback_error from e File "/persist/code/RQFluyt/RQ.Fluyt/.venv/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 425, in init self._create_inference_session(self._fallback_providers, None) File "/persist/code/RQFluyt/RQ.Fluyt/.venv/lib/python3.10/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 460, in _create_inference_session sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) RuntimeError: tensor type 1489264000 is not supported



### Urgency

_No response_

### Platform

Linux

### OS Version

Ubuntu 22.04.3 LTS

### ONNX Runtime Installation

Released Package

### ONNX Runtime Version or Commit ID

1.16.3

### ONNX Runtime API

Python

### Architecture

X86

### Execution Provider

Default CPU

### Execution Provider Library Version

_No response_
pranavsharma commented 8 months ago

Looks like the model generated has an unsupported type. Can you attach the onnx model here? Meanwhile, let me see if I can repro. I'm getting a link error when running the python script. Can you post repro instructions? How was the .so created? What libs were linked, etc.? OSError: /home/pranav/libonnx-op-shared.so: undefined symbol: _ZTVN5torch8autograd12AutogradMetaE

ben-da6 commented 8 months ago

This is my cmake run with

find_package(Torch REQUIRED) include_directories("/persist/onnxruntime/include/") find_library(ONNXRUNTIME_LIBRARY onnxruntime HINTS "/persist/onnxruntime/lib/") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

add_library(onnx-op-shared SHARED ort_op.cpp torch_op.cpp) target_link_libraries(onnx-op-shared "${TORCH_LIBRARIES}" "${ONNXRUNTIME_LIBRARY}")

set_property(TARGET onnx-op-shared PROPERTY CXX_STANDARD 17)

python venv

python 3.10

coloredlogs==15.0.1 custom-group-norm==0.0.0 filelock==3.13.1 flatbuffers==23.5.26 fsspec==2024.2.0 humanfriendly==10.0 Jinja2==3.1.3 MarkupSafe==2.1.5 mpmath==1.3.0 netron==7.5.0 networkx==3.2.1 numpy==1.26.4 onnx==1.15.0 onnxruntime==1.17.1 packaging==23.2 protobuf==4.25.3 sympy==1.12 torch==2.1.2+cu118 triton==2.1.0 typing_extensions==4.10.0

mszhanyi commented 8 months ago

@ben-da6

the custom_op_one should be defined as static void RegisterOps(Ort::CustomOpDomain& domain) { static std::unique_ptr custom_op_one{ Ort::Custom::CreateLiteCustomOp( "Pie", "CPUExecutionProvider", ort_pie)}; domain.Add(custom_op_one.get()); }

ben-da6 commented 7 months ago

thanks!