Open tianleiwu opened 9 months ago
When I run unit test, there is core dump caused by signal 11, an attempt by a program to write or read outside its allocated memory in TRT EP
Call stack is like the following:
libgcc_s.so.1![Unknown/Just-In-Time compiled code] (Unknown Source:0) libgcc_s.so.1!_Unwind_Find_FDE (Unknown Source:0) libgcc_s.so.1![Unknown/Just-In-Time compiled code] (Unknown Source:0) libgcc_s.so.1!_Unwind_RaiseException (Unknown Source:0) libstdc++.so.6!cxa_throw (Unknown Source:0) libnvinfer.so.8![Unknown/Just-In-Time compiled code] (Unknown Source:0) libonnxruntime_providers_tensorrt.so!nvinfer1::IBuilder::buildSerializedNetwork(nvinfer1::IBuilder const this, nvinfer1::INetworkDefinition & network, nvinfer1::IBuilderConfig & config) (\usr\include\x86_64-linux-gnu\NvInfer.h:10180) libonnxruntime_providers_tensorrt.so!onnxruntime::TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(onnxruntime::TensorrtExecutionProvider const this, const onnxruntime::GraphViewer & graph_body_viewer, const onnxruntime::Node & fused_node, std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, unsigned long, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std::cxx11::basic_string<char, std::char_traits, std::allocator > const, unsigned long> > > & input_map, std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, unsigned long, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std::cxx11::basic_string<char, std::char_traits, std::allocator > const, unsigned long> > > & output_map, std::vector<onnxruntime::NodeComputeInfo, std::allocator > & node_compute_funcs) (\home\user1\git\onnxruntime\onnxruntime\core\providers\tensorrt\tensorrt_execution_provider.cc:2987) libonnxruntime_providers_tensorrt.so!onnxruntime::TensorrtExecutionProvider::Compile(onnxruntime::TensorrtExecutionProvider const this, const std::vector<onnxruntime::IExecutionProvider::FusedNodeAndGraph, std::allocator > & fused_nodes_and_graphs, std::vector<onnxruntime::NodeComputeInfo, std::allocator > & node_compute_funcs) (\home\user1\git\onnxruntime\onnxruntime\core\providers\tensorrt\tensorrt_execution_provider.cc:2575) onnxruntime::PartitionOnnxFormatModelImpl(onnxruntime::Graph & graph, onnxruntime::FuncManager & func_mgr, onnxruntime::KernelRegistryManager & kernel_registry_mgr, onnxruntime::KernelRegistry & fused_kernel_registry, onnxruntime::IExecutionProvider & current_ep, onnxruntime::GraphPartitioner::Mode mode, int & fused_node_unique_id, const onnxruntime::layout_transformation::TransformLayoutFunction & transform_layout_fn, const onnxruntime::layout_transformation::DebugGraphFn & debug_graph_fn) (\home\user1\git\onnxruntime\onnxruntime\core\framework\graph_partitioner.cc:462) onnxruntime::PartitionOnnxFormatModel(const onnxruntime::(anonymous namespace)::PartitionParams & partition_params, onnxruntime::GraphPartitioner::Mode mode, const onnxruntime::ExecutionProviders & execution_providers, onnxruntime::KernelRegistryManager & kernel_registry_manager) (\home\user1\git\onnxruntime\onnxruntime\core\framework\graph_partitioner.cc:744) onnxruntime::GraphPartitioner::Partition(onnxruntime::Graph&, onnxruntime::FuncManager&, std::function<onnxruntime::common::Status (onnxruntime::Graph&, bool&, onnxruntime::IExecutionProvider&, std::function<void (onnxruntime::Graph const&)> const&)> const&, onnxruntime::ConfigOptions const&, onnxruntime::logging::Logger const&, onnxruntime::GraphPartitioner::Mode, std::function<void (onnxruntime::Graph const&)> const&) const(const onnxruntime::GraphPartitioner const this, onnxruntime::Graph & graph, onnxruntime::FuncManager & func_mgr, const onnxruntime::layout_transformation::TransformLayoutFunction & transform_layout_function, const onnxruntime::ConfigOptions & config_options, const onnxruntime::logging::Logger & logger, onnxruntime::GraphPartitioner::Mode mode, const onnxruntime::layout_transformation::DebugGraphFn & debug_graph_fn) (\home\user1\git\onnxruntime\onnxruntime\core\framework\graph_partitioner.cc:982) onnxruntime::InferenceSession::TransformGraph(onnxruntime::InferenceSession const this, onnxruntime::Graph & graph, bool saving_model_in_ort_format) (\home\user1\git\onnxruntime\onnxruntime\core\session\inference_session.cc:1214) onnxruntime::InferenceSession::Initialize(onnxruntime::InferenceSession const this) (\home\user1\git\onnxruntime\onnxruntime\core\session\inference_session.cc:1761) onnxruntime::test::BaseTester::ExecuteModel(onnxruntime::test::BaseTester const this, onnxruntime::Model & model, onnxruntime::InferenceSession & session, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const onnxruntime::RunOptions run_options, const std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, OrtValue, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std::cxx11::basic_string<char, std::char_traits, std::allocator > const, OrtValue> > > & feeds, const std::vector<std::cxx11::basic_string<char, std::char_traits, std::allocator >, std::allocator<std::cxx11::basic_string<char, std::char_traits, std::allocator > > > & output_names, const std::string & provider_type, bool allow_released_onnx_opset_only) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:301) onnxruntime::test::BaseTester::ExecuteModelForEps(onnxruntime::test::BaseTester const this, std::vector<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete >, std::allocator<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete > > > && execution_providers, onnxruntime::Model & model, onnxruntime::SessionOptions sess_options, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const onnxruntime::RunOptions run_options, const std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, OrtValue, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std::cxx11::basic_string<char, std::char_traits, std::allocator > const, OrtValue> > > & feeds, const std::vector<std::cxx11::basic_string<char, std::char_traits, std::allocator >, std::allocator<std::cxx11::basic_string<char, std::char_traits, std::allocator > > > & output_names, const std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > custom_registries, bool try_assign_ep_for_nodes, bool allow_released_onnx_opset_only, size_t number_of_pre_packed_weights_counter, size_t number_of_shared_pre_packed_weights_counter) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:803) onnxruntime::test::BaseTester::RunWithConfig(onnxruntime::test::BaseTester const this, size_t number_of_pre_packed_weights_counter, size_t number_of_shared_pre_packed_weights_counter) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:684) onnxruntime::test::BaseTester::Run(onnxruntime::test::BaseTester * const this, onnxruntime::SessionOptions so, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const std::unordered_set<std::cxx11::basic_string<char, std::char_traits, std::allocator >, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::cxx11::basic_string<char, std::char_traits, std::allocator > > > & excluded_provider_types, const onnxruntime::RunOptions run_options, std::vector<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete >, std::allocator<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete > > > execution_providers, const onnxruntime::Graph::ResolveOptions & options, size_t number_of_pre_packed_weights_counter, size_t number_of_shared_pre_packed_weights_counter) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:557) onnxruntime::test::BaseTester::Run(onnxruntime::test::BaseTester * const this, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const std::unordered_set<std::cxx11::basic_string<char, std::char_traits, std::allocator >, std::hash<std::__cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::cxx11::basic_string<char, std::char_traits, std::allocator > > > & excluded_provider_types, const onnxruntime::RunOptions run_options, std::vector<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete >, std::allocator<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete > > > execution_providers, ExecutionMode execution_mode, const onnxruntime::Graph::ResolveOptions & options) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:517) onnxruntime::test::TestSumMultipleInputsNoBroadcasting(size_t num_inputs, const onnxruntime::TensorShape & shape) (\home\user1\git\onnxruntime\onnxruntime\test\providers\cpu\math\element_wise_ops_test.cc:1362)
Since I do no have source code of TensorRT, I cannot debug the issue.
Build from source like the following:
export CUDA_HOME=/usr/local/cuda-12.2 export CUDNN_HOME=/usr/lib/x86_64-linux-gnu/ export CUDACXX=/usr/local/cuda-12.2/bin/nvcc export TRT_HOME=/usr/src/tensorrt sh build.sh --config Debug --build_shared_lib --parallel --use_cuda --cuda_version 12.2 \ --cuda_home $CUDA_HOME --cudnn_home $CUDNN_HOME --build_wheel --skip_tests \ --use_tensorrt --tensorrt_home $TRT_HOME \ --enable_cuda_line_info \ --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=80
Then run a unit test:
cd build/Linux/Debug ./onnxruntime_test_all --gtest_filter=MathOpTest.SumMultipleInputsNoBroadcasting
No response
Linux
ubuntu 20.04
Built from Source
main
C++
X64
TensorRT
TensorRT 8.6.1.6-1+cuda12.0
I tested rel-1.15.0 branch. This issue also reproduces in 1.15.
Describe the issue
When I run unit test, there is core dump caused by signal 11, an attempt by a program to write or read outside its allocated memory in TRT EP
Call stack is like the following:
libgcc_s.so.1![Unknown/Just-In-Time compiled code] (Unknown Source:0) libgcc_s.so.1!_Unwind_Find_FDE (Unknown Source:0) libgcc_s.so.1![Unknown/Just-In-Time compiled code] (Unknown Source:0) libgcc_s.so.1!_Unwind_RaiseException (Unknown Source:0) libstdc++.so.6!cxa_throw (Unknown Source:0) libnvinfer.so.8![Unknown/Just-In-Time compiled code] (Unknown Source:0) libonnxruntime_providers_tensorrt.so!nvinfer1::IBuilder::buildSerializedNetwork(nvinfer1::IBuilder const this, nvinfer1::INetworkDefinition & network, nvinfer1::IBuilderConfig & config) (\usr\include\x86_64-linux-gnu\NvInfer.h:10180) libonnxruntime_providers_tensorrt.so!onnxruntime::TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(onnxruntime::TensorrtExecutionProvider const this, const onnxruntime::GraphViewer & graph_body_viewer, const onnxruntime::Node & fused_node, std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, unsigned long, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std:: cxx11::basic_string<char, std::char_traits, std::allocator > const, unsigned long> > > & input_map, std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, unsigned long, std::hash<std:: cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std:: cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std::cxx11::basic_string<char, std::char_traits, std::allocator > const, unsigned long> > > & output_map, std::vector<onnxruntime::NodeComputeInfo, std::allocator > & node_compute_funcs) (\home\user1\git\onnxruntime\onnxruntime\core\providers\tensorrt\tensorrt_execution_provider.cc:2987)
libonnxruntime_providers_tensorrt.so!onnxruntime::TensorrtExecutionProvider::Compile(onnxruntime::TensorrtExecutionProvider const this, const std::vector<onnxruntime::IExecutionProvider::FusedNodeAndGraph, std::allocator > & fused_nodes_and_graphs, std::vector<onnxruntime::NodeComputeInfo, std::allocator > & node_compute_funcs) (\home\user1\git\onnxruntime\onnxruntime\core\providers\tensorrt\tensorrt_execution_provider.cc:2575)
onnxruntime::PartitionOnnxFormatModelImpl(onnxruntime::Graph & graph, onnxruntime::FuncManager & func_mgr, onnxruntime::KernelRegistryManager & kernel_registry_mgr, onnxruntime::KernelRegistry & fused_kernel_registry, onnxruntime::IExecutionProvider & current_ep, onnxruntime::GraphPartitioner::Mode mode, int & fused_node_unique_id, const onnxruntime::layout_transformation::TransformLayoutFunction & transform_layout_fn, const onnxruntime::layout_transformation::DebugGraphFn & debug_graph_fn) (\home\user1\git\onnxruntime\onnxruntime\core\framework\graph_partitioner.cc:462)
onnxruntime::PartitionOnnxFormatModel(const onnxruntime::(anonymous namespace)::PartitionParams & partition_params, onnxruntime::GraphPartitioner::Mode mode, const onnxruntime::ExecutionProviders & execution_providers, onnxruntime::KernelRegistryManager & kernel_registry_manager) (\home\user1\git\onnxruntime\onnxruntime\core\framework\graph_partitioner.cc:744)
onnxruntime::GraphPartitioner::Partition(onnxruntime::Graph&, onnxruntime::FuncManager&, std::function<onnxruntime::common::Status (onnxruntime::Graph&, bool&, onnxruntime::IExecutionProvider&, std::function<void (onnxruntime::Graph const&)> const&)> const&, onnxruntime::ConfigOptions const&, onnxruntime::logging::Logger const&, onnxruntime::GraphPartitioner::Mode, std::function<void (onnxruntime::Graph const&)> const&) const(const onnxruntime::GraphPartitioner const this, onnxruntime::Graph & graph, onnxruntime::FuncManager & func_mgr, const onnxruntime::layout_transformation::TransformLayoutFunction & transform_layout_function, const onnxruntime::ConfigOptions & config_options, const onnxruntime::logging::Logger & logger, onnxruntime::GraphPartitioner::Mode mode, const onnxruntime::layout_transformation::DebugGraphFn & debug_graph_fn) (\home\user1\git\onnxruntime\onnxruntime\core\framework\graph_partitioner.cc:982)
onnxruntime::InferenceSession::TransformGraph(onnxruntime::InferenceSession const this, onnxruntime::Graph & graph, bool saving_model_in_ort_format) (\home\user1\git\onnxruntime\onnxruntime\core\session\inference_session.cc:1214)
onnxruntime::InferenceSession::Initialize(onnxruntime::InferenceSession const this) (\home\user1\git\onnxruntime\onnxruntime\core\session\inference_session.cc:1761)
onnxruntime::test::BaseTester::ExecuteModel(onnxruntime::test::BaseTester const this, onnxruntime::Model & model, onnxruntime::InferenceSession & session, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const onnxruntime::RunOptions run_options, const std::unordered_map<std::cxx11::basic_string<char, std::char_traits, std::allocator >, OrtValue, std::hash<std:: cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std:: cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std::cxx11::basic_string<char, std::char_traits, std::allocator > const, OrtValue> > > & feeds, const std::vector<std:: cxx11::basic_string<char, std::char_traits, std::allocator >, std::allocator<std::cxx11::basic_string<char, std::char_traits, std::allocator > > > & output_names, const std::string & provider_type, bool allow_released_onnx_opset_only) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:301)
onnxruntime::test::BaseTester::ExecuteModelForEps(onnxruntime::test::BaseTester const this, std::vector<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete >, std::allocator<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete > > > && execution_providers, onnxruntime::Model & model, onnxruntime::SessionOptions sess_options, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const onnxruntime::RunOptions run_options, const std::unordered_map<std:: cxx11::basic_string<char, std::char_traits, std::allocator >, OrtValue, std::hash<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::pair<std:: cxx11::basic_string<char, std::char_traits, std::allocator > const, OrtValue> > > & feeds, const std::vector<std::cxx11::basic_string<char, std::char_traits, std::allocator >, std::allocator<std:: cxx11::basic_string<char, std::char_traits, std::allocator > > > & output_names, const std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > custom_registries, bool try_assign_ep_for_nodes, bool allow_released_onnx_opset_only, size_t number_of_pre_packed_weights_counter, size_t number_of_shared_pre_packed_weights_counter) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:803)
onnxruntime::test::BaseTester::RunWithConfig(onnxruntime::test::BaseTester const this, size_t number_of_pre_packed_weights_counter, size_t number_of_shared_pre_packed_weights_counter) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:684)
onnxruntime::test::BaseTester::Run(onnxruntime::test::BaseTester * const this, onnxruntime::SessionOptions so, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const std::unordered_set<std::cxx11::basic_string<char, std::char_traits, std::allocator >, std::hash<std:: cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std:: cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std::cxx11::basic_string<char, std::char_traits, std::allocator > > > & excluded_provider_types, const onnxruntime::RunOptions run_options, std::vector<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete >, std::allocator<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete > > > execution_providers, const onnxruntime::Graph::ResolveOptions & options, size_t number_of_pre_packed_weights_counter, size_t number_of_shared_pre_packed_weights_counter) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:557)
onnxruntime::test::BaseTester::Run(onnxruntime::test::BaseTester * const this, onnxruntime::test::BaseTester::ExpectResult expect_result, const std::string & expected_failure_string, const std::unordered_set<std:: cxx11::basic_string<char, std::char_traits, std::allocator >, std::hash<std::__cxx11::basic_string<char, std::char_traits, std::allocator > >, std::equal_to<std::cxx11::basic_string<char, std::char_traits, std::allocator > >, std::allocator<std:: cxx11::basic_string<char, std::char_traits, std::allocator > > > & excluded_provider_types, const onnxruntime::RunOptions run_options, std::vector<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete >, std::allocator<std::unique_ptr<onnxruntime::IExecutionProvider, std::default_delete > > > execution_providers, ExecutionMode execution_mode, const onnxruntime::Graph::ResolveOptions & options) (\home\user1\git\onnxruntime\onnxruntime\test\providers\base_tester.cc:517)
onnxruntime::test::TestSumMultipleInputsNoBroadcasting(size_t num_inputs, const onnxruntime::TensorShape & shape) (\home\user1\git\onnxruntime\onnxruntime\test\providers\cpu\math\element_wise_ops_test.cc:1362)
Since I do no have source code of TensorRT, I cannot debug the issue.
To reproduce
Build from source like the following:
Then run a unit test:
Urgency
No response
Platform
Linux
OS Version
ubuntu 20.04
ONNX Runtime Installation
Built from Source
ONNX Runtime Version or Commit ID
main
ONNX Runtime API
C++
Architecture
X64
Execution Provider
TensorRT
Execution Provider Library Version
TensorRT 8.6.1.6-1+cuda12.0