pykeio / ort

Fast ML inference & training for Rust with ONNX Runtime
https://ort.pyke.io/
Apache License 2.0
786 stars 91 forks source link

Cannot inference model with EP: TensorRT on ort@2.0.0-rc.2 #226

Closed dat58 closed 2 months ago

dat58 commented 2 months ago

Environment:

let model_path = "warehouse/model.onnx";
let global_options = ort::EnvironmentGlobalThreadPoolOptions {
            intra_op_parallelism: Some(16),
            ..Default::default()
};
let _ = ort::init().with_global_thread_pool(global_options).commit()?;
let provider = ort::TensorRTExecutionProvider::default()
                        .with_device_id(0)
                        .with_engine_cache(true)
                        .with_engine_cache_path("warehouse".to_owned())
                        .with_profile_min_shapes("images:1x3x256x256".to_owned())
                        .with_profile_opt_shapes("images:32x3x256x256".to_owned())
                        .with_profile_max_shapes("images:64x3x256x256".to_owned())
                        .with_max_partition_iterations(10)                
                        .with_max_workspace_size(2 * 1024 * 1024 * 1024)
                        .build();
let session = Session::builder()?
                .with_optimization_level(ort::GraphOptimizationLevel::Level3)?
                .with_execution_providers([provider])?
                .commit_from_file(model_path)?;

Then run:

let outputs = session.run_async(inputs)?.await?;

Got errors:

level=INFO, timestamp=2024-07-06T13:56:42, Successfully registered `TensorrtExecutionProvider`
level=INFO, timestamp=2024-07-06T13:56:42, apply_execution_providers;
level=INFO, timestamp=2024-07-06T13:56:44, initialized model: OrtInputs { shapes: [[]], dtypes: [Tensor { ty: Float32, dimensions: [-1, 3, -1, -1] }], names: ["images"], sizes: [[256, 256]] }
level=INFO, timestamp=2024-07-06T13:56:44, starting 128 workers
level=INFO, timestamp=2024-07-06T13:56:44, Actix runtime found; starting in Actix runtime
level=INFO, timestamp=2024-07-06T14:03:24, new_cpu; allocator=Arena memory_type=Default
level=INFO, timestamp=2024-07-06T14:03:24, drop; self=MemoryInfo { ptr: 0x7f0474000d90, should_release: true }
level=INFO, timestamp=2024-07-06T14:03:24, endpoint="POST /facesdk/detect_face", request_id=test, image_count=1, decode_took=3.312934ms, preprocess_took=3.30027ms, infer_took=120.512565ms, postprocess_took=198.423µs, align_took=756.85µs, total=128.48752ms
level=INFO, timestamp=2024-07-06T14:03:24, new_cpu; allocator=Arena memory_type=Default
level=WARN, timestamp=2024-07-06T14:03:24, [2024-07-06 07:03:24   ERROR] 1: [reformat.cpp::executeCutensor::332] Error Code 1: CuTensor (Internal cuTensor permutate execute failed)
level=WARN, timestamp=2024-07-06T14:03:24, [2024-07-06 07:03:24   ERROR] 1: [checkMacros.cpp::catchCudaError::181] Error Code 1: Cuda Runtime (invalid resource handle)
level=WARN, timestamp=2024-07-06T14:03:24, Non-zero status code returned while running TRTKernel_graph_torch_jit_2800804591407344468_0 node. Name:'TensorrtExecutionProvider_TRTKernel_graph_torch_jit_2800804591407344468_0_0' Status Message: TensorRT EP execution context enqueue failed.
thread '<unnamed>' panicked at /root/.cargo/registry/src/index.crates.io-6f17d22bba15001f/ort-2.0.0-rc.2/src/session/async.rs:171:58:
OrtValue ptr returned from session Run should not be null
stack backtrace:
   0:     0x555c28234a5f - std::backtrace_rs::backtrace::libunwind::trace::h67a838aed1f4d6ec
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
   1:     0x555c28234a5f - std::backtrace_rs::backtrace::trace_unsynchronized::h1d1786bb1962baf8
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
   2:     0x555c28234a5f - std::sys_common::backtrace::_print_fmt::h5a0b1f807a002d23
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:67:5
   3:     0x555c28234a5f - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hf84ab6ad0b91784c
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:44:22
level=INFO, timestamp=2024-07-06T14:03:24, new_cpu; allocator=Arena memory_type=Default
   4:     0x555c27ff204c - core::fmt::rt::Argument::fmt::h28f463bd1fdabed5
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/fmt/rt.rs:138:9
   5:     0x555c27ff204c - core::fmt::write::ha37c23b175e921b3
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/fmt/mod.rs:1114:21
   6:     0x555c28201e7d - std::io::Write::write_fmt::haa1b000741bcbbe1
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/io/mod.rs:1763:15
   7:     0x555c2823618e - std::sys_common::backtrace::_print::h1ff1030b04dfb157
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:47:5
   8:     0x555c2823618e - std::sys_common::backtrace::print::hb982056c6f29541c
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:34:9
   9:     0x555c28235d54 - std::panicking::default_hook::{{closure}}::h11f92f82c62fbd68
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:272:22
  10:     0x555c28236d4c - std::panicking::default_hook::hb8810fe276772c66
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:292:9
  11:     0x555c28236d4c - std::panicking::rust_panic_with_hook::hd2f0efd2fec86cb0
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:731:13
  12:     0x555c2823686c - std::panicking::begin_panic_handler::{{closure}}::h3651b7fc4f61d784
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:609:13
  13:     0x555c282367c6 - std::sys_common::backtrace::__rust_end_short_backtrace::hbc468e4b98c7ae04
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:170:18
  14:     0x555c282367b1 - rust_begin_unwind
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:597:5
  15:     0x555c27f399b4 - core::panicking::panic_fmt::h979245e2fdb2fabd
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:72:14
  16:     0x555c27f39f92 - core::panicking::panic_display::h9b355c58fd35af38
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:168:5
  17:     0x555c27f39f92 - core::panicking::panic_str::h187a5146d72e7d2f
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:152:5
  18:     0x555c27f39f92 - core::option::expect_failed::h7cdfa49208a82a89
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/option.rs:1988:5
  19:     0x555c280b93ae - ort::session::async::async_callback::h512d1418625b5d61
  20:     0x7f07f851af8e - <unknown>
  21:     0x7f07f8f33e28 - <unknown>
  22:     0x7f07f8f34b76 - <unknown>
  23:     0x7f07f8f407a4 - <unknown>
  24:     0x7f0804159ac3 - <unknown>
  25:     0x7f08041eaa04 - __clone
  26:                0x0 - <unknown>
Aborted (core dumped)

Before ort@2.0.0-rc.2, I had used ort@1.16.3, and it worked perfectly. However, it did not provide async run. Has anyone else experienced this issue?

decahedron1 commented 2 months ago

This is an issue with TensorRT/ONNX Runtime's TensorRT EP and not something that can be fixed by ort. Sorry.

dat58 commented 2 months ago

This is an issue with TensorRT/ONNX Runtime's TensorRT EP and not something that can be fixed by ort. Sorry.

Hi @decahedron1, thank you for your reply. However, ort@2.0.0-rc.2 requires Onnxruntime version 1.17.x or higher and encounters issues with TensorRT. Could you please suggest a method for running TensorRT with ort@2.0.0-rc.2? For example, information about matching versions of Onnxruntime, CUDA, cuDNN, and TensorRT would be greatly appreciated.