microsoft / onnxruntime

ONNX Runtime: cross-platform, high performance ML inferencing and training accelerator
https://onnxruntime.ai
MIT License
14.87k stars 2.94k forks source link

[Build] breakage with protobuf-27.2, due to API change #21308

Open stefantalpalaru opened 4 months ago

stefantalpalaru commented 4 months ago

Describe the issue

Protobuf has removed RepeatedPtrField::ReleaseCleared() from its API, as described here: https://protobuf.dev/news/v26/#remove-deprecated-clear-apis-on-repeated-fields

Urgency

No response

Target platform

Linux AMD64

Build script

cmake -C /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/cmake/gentoo_common_config.cmake -G Ninja -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_INSTALL_INCLUDEDIR=include/onnxruntime -Donnxruntime_REQUIRE_PYTHON_EMBED_LIB=OFF -Donnxruntime_USE_FULL_PROTOBUF=OFF -Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=OFF -Donnxruntime_BUILD_SHARED_LIB=ON -Donnxruntime_ENABLE_PYTHON=yes -Donnxruntime_BUILD_BENCHMARKS=no -Donnxruntime_BUILD_UNIT_TESTS=no -Donnxruntime_RUN_ONNX_TESTS=no -Donnxruntime_ENABLE_LAZY_TENSOR=OFF -Donnxruntime_USE_MPI=yes -Donnxruntime_USE_PREINSTALLED_EIGEN=ON -Donnxruntime_USE_DNNL=no -Donnxruntime_USE_CUDA=yes -Donnxruntime_USE_ROCM=no -Donnxruntime_USE_AVX=yes -Donnxruntime_USE_AVX2=yes -Donnxruntime_USE_AVX512=no -Donnxruntime_USE_MIMALLOC=no -Donnxruntime_USE_XNNPACK=no -Donnxruntime_ENABLE_LTO=no -Deigen_SOURCE_PATH=/usr/include/eigen3 -DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS -DFETCHCONTENT_FULLY_DISCONNECTED=ON -DFETCHCONTENT_QUIET=OFF -DFETCHCONTENT_SOURCE_DIR_SAFEINT=/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/SafeInt-3.0.28a -DFETCHCONTENT_SOURCE_DIR_FLATBUFFERS=/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/flatbuffers-23.5.26 -DFETCHCONTENT_SOURCE_DIR_DATE=/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/date-3.0.1 -Donnxruntime_USE_TENSORRT=no -Donnxruntime_USE_JSEP=no -Donnxruntime_ENABLE_MEMORY_PROFILE=OFF -Donnxruntime_DISABLE_ABSEIL=ON -Donnxruntime_BUILD_FOR_NATIVE_MACHINE=OFF -Donnxruntime_USE_MIMALLOC=OFF -Donnxruntime_BUILD_CSHARP=OFF -Donnxruntime_BUILD_JAVA=OFF -Donnxruntime_BUILD_NODEJS=OFF -Donnxruntime_BUILD_OBJC=OFF -Donnxruntime_BUILD_APPLE_FRAMEWORK=OFF -Donnxruntime_USE_NNAPI_BUILTIN=OFF -Donnxruntime_USE_RKNPU=OFF -Donnxruntime_USE_LLVM=no -Donnxruntime_ENABLE_MICROSOFT_INTERNAL=OFF -Donnxruntime_USE_VITISAI=OFF -Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=OFF -Donnxruntime_USE_TVM=OFF -Donnxruntime_TVM_USE_HASH=OFF -Donnxruntime_USE_MIGRAPHX=no -Donnxruntime_CROSS_COMPILING=OFF -Donnxruntime_DISABLE_CONTRIB_OPS=ON -Donnxruntime_DISABLE_ML_OPS=ON -Donnxruntime_DISABLE_RTTI=OFF -Donnxruntime_DISABLE_EXCEPTIONS=yes -Donnxruntime_MINIMAL_BUILD=OFF -Donnxruntime_EXTENDED_MINIMAL_BUILD=OFF -Donnxruntime_MINIMAL_BUILD_CUSTOM_OPS=OFF -Donnxruntime_REDUCED_OPS_BUILD=OFF -Donnxruntime_ENABLE_LANGUAGE_INTEROP_OPS=OFF -Donnxruntime_USE_DML=OFF -Donnxruntime_USE_WINML=OFF -Donnxruntime_BUILD_MS_EXPERIMENTAL_OPS=OFF -Donnxruntime_USE_TELEMETRY=OFF -Donnxruntime_USE_ACL=OFF -Donnxruntime_USE_ACL_1902=OFF -Donnxruntime_USE_ACL_1905=OFF -Donnxruntime_USE_ACL_1908=OFF -Donnxruntime_USE_ACL_2002=OFF -Donnxruntime_USE_ARMNN=OFF -Donnxruntime_ARMNN_RELU_USE_CPU=ON -Donnxruntime_ARMNN_BN_USE_CPU=ON -Donnxruntime_ENABLE_NVTX_PROFILE=OFF -Donnxruntime_ENABLE_TRAINING=OFF -Donnxruntime_ENABLE_TRAINING_OPS=OFF -Donnxruntime_ENABLE_TRAINING_APIS=OFF -Donnxruntime_ENABLE_CPU_FP16_OPS=OFF -Donnxruntime_USE_NCCL=OFF -DOnnxruntime_GCOV_COVERAGE=OFF -Donnxruntime_ENABLE_MEMORY_PROFILE=OFF -Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=OFF -Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING=ON -Donnxruntime_ENABLE_WEBASSEMBLY_API_EXCEPTION_CATCHING=OFF -Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_THROWING=ON -Donnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER=OFF -Donnxruntime_ENABLE_WEBASSEMBLY_THREADS=OFF -Donnxruntime_ENABLE_WEBASSEMBLY_DEBUG_INFO=OFF -Donnxruntime_ENABLE_WEBASSEMBLY_PROFILING=OFF -Donnxruntime_ENABLE_EAGER_MODE=OFF -Donnxruntime_ENABLE_LAZY_TENSOR=OFF -Donnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS=OFF -Donnxruntime_ENABLE_ROCM_PROFILING=OFF -Donnxruntime_USE_CANN=OFF -Donnxruntime_PYBIND_EXPORT_OPSCHEMA=OFF -Donnxruntime_ENABLE_MEMLEAK_CHECKER=ON -Donnxruntime_CUDA_HOME=/opt/cuda -Donnxruntime_CUDNN_HOME=/usr -DCMAKE_CUDA_ARCHITECTURES=86-real -DCMAKE_CUDA_HOST_COMPILER=/usr/x86_64-pc-linux-gnu/gcc-bin/13 -DCMAKE_CUDA_FLAGS=-forward-unknown-opts -fno-lto -O2 -v --compiler-bindir "/usr/x86_64-pc-linux-gnu/gcc-bin/13" --compiler-options "-O3 -march=native -pipe" --linker-options "-O1,--as-needed,--sort-common" -DCMAKE_CUDA_STANDARD_REQUIRED=ON -DCMAKE_CXX_STANDARD_REQUIRED=ON -Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=OFF -Donnxruntime_ENABLE_CUDA_PROFILING=OFF -Donnxruntime_TVM_CUDA_RUNTIME=OFF -Donnxruntime_USE_NCCL=OFF -Donnxruntime_NVCC_THREADS=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/cmake/gentoo_toolchain.cmake /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/cmake

Error / output

[169/619] /usr/bin/x86_64-pc-linux-gnu-g++ -DCPUINFO_SUPPORTED_PLATFORM=1 -DDISABLE_ABSEIL -DDISABLE_CONTRIB_OPS -DDISABLE_CUSPARSE_DEPRECATED -DDISABLE_ML_OPS -DEIGEN_MPL2_ONLY -DEIGEN_USE_THREADS -DNSYNC_ATOMIC _CPP11 -DONLY_C_LOCALE=0 -DONNX_ML=1 -DONNX_NAMESPACE=onnx -DONNX_USE_LITE_PROTO=1 -DORT_ENABLE_STREAM -DPLATFORM_POSIX -DPROTOBUF_USE_DLLS -DUSE_CUDA=1 -DUSE_MPI=1 -D_GNU_SOURCE -DONNX_DISABLE_STATIC_REGISTRAT ION -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/include/onnxruntime -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/include/onnxruntime/core/session -I/var/ tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/cmake -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/wor k/SafeInt-3.0.28a -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/date-3.0.1/include -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/flatbuffers-23.5.26/include -I/usr/include/eigen3 -O3 -march=nat ive -pipe -Wno-dangling-reference -Wno-c++20-compat -ffunction-sections -fdata-sections -mavx -Wno-restrict -DCPUINFO_SUPPORTED -std=gnu++17 -fPIC -Wall -Wextra -Wno-deprecated-copy -Wno-nonnull-compare -Wno-str ict-aliasing -Wno-parentheses -Wno-deprecated-declarations -MD -MT CMakeFiles/onnxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o -MF CMa keFiles/onnxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o.d -o CMakeFiles/onnxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1. 18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o -c /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc FAILED: CMakeFiles/onnxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o /usr/bin/x86_64-pc-linux-gnu-g++ -DCPUINFO_SUPPORTED_PLATFORM=1 -DDISABLE_ABSEIL -DDISABLE_CONTRIB_OPS -DDISABLE_CUSPARSE_DEPRECATED -DDISABLE_ML_OPS -DEIGEN_MPL2_ONLY -DEIGEN_USE_THREADS -DNSYNC_ATOMIC_CPP11 -DO NLY_C_LOCALE=0 -DONNX_ML=1 -DONNX_NAMESPACE=onnx -DONNX_USE_LITE_PROTO=1 -DORT_ENABLE_STREAM -DPLATFORM_POSIX -DPROTOBUF_USE_DLLS -DUSE_CUDA=1 -DUSE_MPI=1 -D_GNU_SOURCE -DONNX_DISABLE_STATIC_REGISTRATION -I/var /tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/include/onnxruntime -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/include/onnxruntime/core/session -I/var/tmp/portag e/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/cmake -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/SafeInt- 3.0.28a -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/date-3.0.1/include -I/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/flatbuffers-23.5.26/include -I/usr/include/eigen3 -O3 -march=native -pipe -Wno-dangling-reference -Wno-c++20-compat -ffunction-sections -fdata-sections -mavx -Wno-restrict -DCPUINFO_SUPPORTED -std=gnu++17 -fPIC -Wall -Wextra -Wno-deprecated-copy -Wno-nonnull-compare -Wno-strict-aliasi ng -Wno-parentheses -Wno-deprecated-declarations -MD -MT CMakeFiles/onnxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o -MF CMakeFiles/on nxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o.d -o CMakeFiles/onnxruntime_graph.dir/var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/wo rk/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc.o -c /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc: In constructor ‘onnxruntime::Graph::Graph(const onnxruntime::Model&, onnx::GraphProto, const std::unordere d_map<std::__cxx11::basic_string, int>&, onnxruntime::Version, onnxruntime::IOnnxRuntimeOpSchemaCollectionPtr, onnxruntime::Graph, const onnxruntime::Node*, const onnxruntime::logging::Logger&, bool)’: /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc:1239:58: error: ‘class google::protobuf::RepeatedPtrField’ has no member named ‘Rel easeCleared’ 1239 | delete graphproto->mutable_sparse_initializer()->ReleaseCleared(); | ^~~~~~ /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc: In member function ‘void onnxruntime::Graph::CleanAllInitializedTensors()’: /var/tmp/portage/sci-libs/onnxruntime-1.18.0-r1/work/onnxruntime-1.18.0/onnxruntime/core/graph/graph.cc:3513:49: error: ‘class google::protobuf::RepeatedPtrField’ has no member named ‘ReleaseCl eared’ 3513 | delete graphproto->mutable_initializer()->ReleaseCleared(); | ^~~~~~

Visual Studio Version

No response

GCC / Compiler Version

No response

snnn commented 4 months ago

Yeah .. I knew it. Do you know how to fix it?

stefantalpalaru commented 4 months ago

There's a ReleaseLast() in the protobuf API, but I don't know if it releases cleared elements and that mention of a possible object copy in case of arenas is suspicious: https://protobuf.dev/reference/cpp/api-docs/google.protobuf.repeated_field/#RepeatedPtrField.ReleaseLast.details

For my Gentoo overlay package, I just removed the whole thing: https://github.com/stefantalpalaru/gentoo-overlay/blob/4f968989e6bb526ac83d613ce2ea0f491f13df49/sci-libs/onnxruntime/files/onnxruntime-1.18.1-protobuf-27.patch

github-actions[bot] commented 3 months ago

This issue has been automatically marked as stale due to inactivity and will be closed in 30 days if no further activity occurs. If further support is needed, please provide an update and/or more details.

snnn commented 1 month ago

We still need to address this issue.

cho-m commented 2 weeks ago

It looks like ReleaseLast()/UnsafeArenaReleaseLast() operates on non-cleared elements. Maybe can replace Clear() and just loop on one of those functions if the goal is to free all elements (which sounds like the case based on comment):

https://github.com/microsoft/onnxruntime/blob/ff57ac4f3ddcc535aef30d8e638360a9f9600cec/onnxruntime/core/graph/graph.cc#L1287-L1292

https://github.com/microsoft/onnxruntime/blob/ff57ac4f3ddcc535aef30d8e638360a9f9600cec/onnxruntime/core/graph/graph.cc#L3637-L3644


EDIT: One scenario to check would be if there were any cleared elements prior to above Clear() which will no longer be freed. Not sure how to handle those.

cho-m commented 1 week ago

Thinking something like below may work but not sure on testing it. Couldn't tell if there was already related test for this.

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index e8a5855b36..3e3fb7eb88 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -1285,10 +1285,13 @@ Graph::Graph(const Model& owning_model,
     }

     // Remove sparse_initializers from protobuf to save memory as they are converted to dense now
-    graph_proto_->mutable_sparse_initializer()->Clear();
-    const int sparse_num_cleared = graph_proto_->sparse_initializer().ClearedCount();
-    for (int i = 0; i < sparse_num_cleared; ++i) {
-      delete graph_proto_->mutable_sparse_initializer()->ReleaseCleared();
+    auto& mutable_sparse_initializers = *(graph_proto_->mutable_sparse_initializer());
+    if (mutable_sparse_initializers.GetArena() == nullptr) {
+      while (mutable_sparse_initializers.size() > 0) {
+        delete mutable_sparse_initializers.UnsafeArenaReleaseLast();
+      }
+    } else {
+      mutable_sparse_initializers.Clear();
     }
   }
 #endif
@@ -3637,10 +3640,13 @@ void Graph::CleanAllInitializedTensors() noexcept {
   // Clearing RepeatedPtrFields does not free objects' memory. The memory is retained
   // and can be reused. Need to explicitly release the cleared objects and free the
   // memory.
-  graph_proto_->mutable_initializer()->Clear();
-  const int num_cleared = graph_proto_->initializer().ClearedCount();
-  for (int i = 0; i < num_cleared; i++) {
-    delete graph_proto_->mutable_initializer()->ReleaseCleared();
+  auto& mutable_initializers = *(graph_proto_->mutable_initializer());
+  if (mutable_initializers.GetArena() == nullptr) {
+    while (mutable_initializers.size() > 0) {
+      delete mutable_initializers.UnsafeArenaReleaseLast();
+    }
+  } else {
+    mutable_initializers.Clear();
   }
 }

If operating on an arena, then it shouldn't be possible to free memory as ownership is never transferred. Otherwise, seems like it should be possible to directly use the "UnsafeArena*" methods and avoid the extra handling there (copy attempt if on an arena)