triton-inference-server / server

The Triton Inference Server provides an optimized cloud and edge inferencing solution.
https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html
BSD 3-Clause "New" or "Revised" License
8k stars 1.44k forks source link

Docker build fails because of maybe-uninitialized warning #6189

Open mapa17 opened 1 year ago

mapa17 commented 1 year ago

Description I am trying to build a triton docker image following the https://github.com/triton-inference-server/server/blob/r23.07/docs/customization_guide/build.md#building-with-docker

Using the build command python ./build.py --target-platform linux --target-machine x86_64 --build-type=MinSizeRel --version 2.36.0 --enable-gpu --endpoint=grpc --endpoint=http --backend=pytorch

Building fails complaining about a maybe-uniitialized warning during linking. I think this could be related tot he issue: https://github.com/triton-inference-server/server/issues/5643

Error

 47%] Linking CXX executable multi_server
/usr/bin/cmake -E cmake_link_script CMakeFiles/multi_server.dir/link.txt --verbose=0
make[5]: Leaving directory '/tmp/tritonbuild/tritonserver/build/triton-server'
[ 47%] Built target memory_alloc
make[5]: Leaving directory '/tmp/tritonbuild/tritonserver/build/triton-server'
[ 47%] Built target multi_server
/tmp/tritonbuild/tritonserver/build/triton-server/_deps/repo-backend-src/src/backend_input_collector.cc: In member function 'bool triton::backend::BackendInputCollector::FlushPendingPinned(char*, size_t, TRITONSERVER_MemoryType, int64_t)':
/tmp/tritonbuild/tritonserver/build/triton-server/_deps/repo-backend-src/src/backend_input_collector.cc:680:77: error: 'pinned_memory_type' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  680 |             CommonErrorToTritonError(triton::common::AsyncWorkQueue::AddTask(
      |                                      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
  681 |                 [this, offset, pinned_memory, pinned_memory_type,
      |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~            
  682 |                  pending_pinned_byte_size, pinned_memory_type_id, pending_it,
      |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  683 |                  end_it, incomplete_count, &deferred_pinned]() mutable {
      |                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~     
  684 |                   for (; pending_it != end_it; pending_it++) {
      |                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~               
  685 |                     SetInputTensor(
      |                     ~~~~~~~~~~~~~~~                                          
  686 |                         "pinned async H2H", *pending_it, pinned_memory,
      |                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~      
  687 |                         pending_pinned_byte_size, pinned_memory_type,
      |                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        
  688 |                         pinned_memory_type_id, offset,
      |                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                       
  689 |                         TRITONSERVER_MEMORY_CPU_PINNED, false, false);
      |                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~       
  690 |                     offset += pending_it->memory_desc_.byte_size_;
      |                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~           
  691 |                   }
      |                   ~                                                          
  692 |                   // The last segmented task will start the next phase of
      |                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    
  693 |                   // the internal pinned buffer copy
      |                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                         
  694 |                   if (incomplete_count->fetch_sub(1) == 1) {
      |                   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                 
  695 | #ifdef TRITON_ENABLE_GPU
      | ~~~~~~~~~~~~~~~~~~~~~~~~                                                     
  696 |                     if (buffer_ready_event_ != nullptr) {
      |                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                    
  697 |                       cudaEventSynchronize(buffer_ready_event_);
      |                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~             
  698 |                       buffer_ready_event_ = nullptr;
      |                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                         
  699 |                     }
      |                     ~                                                        
  700 | #endif  // TRITON_ENABLE_GPU
      | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~                                                 
  701 |                     completion_queue_.Put(deferred_pinned.Finalize(stream_));
      |                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  702 |                     delete incomplete_count;
      |                     ~~~~~~~~~~~~~~~~~~~~~~~~                                 
  703 |                   }
      |                   ~                                                          
  704 |                 }));
      |                 ~~                                                           
cc1plus: all warnings being treated as errors
make[5]: *** [_deps/repo-backend-build/CMakeFiles/triton-backend-utils.dir/build.make:90: _deps/repo-backend-build/CMakeFiles/triton-backend-utils.dir/src/backend_input_collector.cc.o] Error 1
make[5]: *** Waiting for unfinished jobs....
make[5]: Leaving directory '/tmp/tritonbuild/tritonserver/build/triton-server'
make[4]: *** [CMakeFiles/Makefile2:1012: _deps/repo-backend-build/CMakeFiles/triton-backend-utils.dir/all] Error 2
make[4]: *** Waiting for unfinished jobs....

Triton Information What version of Triton are you using?

Are you using the Triton container or did you build it yourself?

To Reproduce Steps to reproduce the behavior.

Expected behavior No error during build. A docker image is created.

mapa17 commented 1 year ago

I can report that changing the build-type to Release results in an successful build.

python ./build.py --target-platform linux --target-machine x86_64 --build-type=Release --version 2.36.0 --enable-gpu --endpoint=grpc --endpoint=http --backend=pytorch

mirekphd commented 3 months ago

I can report that changing the build-type to Release results in an successful build.

I confirm this result, thanks @mapa17, but a successful compilation seems to be version-dependent and component-dependent, in my case (in latestml/ml-gpu-py311-cuda118-hf containers used as the base image) it required also setting --version 2.36.0 or possibly later (earlier ones may still fail, e.g. --version 2.29.0 did despite --build-type=Release) and also succeeded for the --backend=python, but failed for for --backend=pytorch, with an attempt to... pull a custom image containing PyTorch using docker (how was that nested docker pull supposed to work when building another docker image I cannot imagine - it would normally be done using the FROM directive from the Dockerfile rather than a CLI command docker pull :)

More info:

[ 12%] Extracting pytorch and torchvision libraries and includes from nvcr.io/nvidia/pytorch:23.07-py3
cd /tmp/triton-build/pytorch/build/_deps/repo-common-build/src && /usr/bin/c++  -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src/../include -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror -MD -MT _deps/repo-common-build/src/CMakeFiles/triton-common-error.dir/error.cc.o -MF CMakeFiles/triton-common-error.dir/error.cc.o.d -o CMakeFiles/triton-common-error.dir/error.cc.o -c /tmp/triton-build/pytorch/build/_deps/repo-common-src/src/error.cc
/opt/conda/lib/python3.11/site-packages/cmake/data/bin/cmake -E make_directory include/torchvision
[ 15%] Building CXX object _deps/repo-common-build/src/CMakeFiles/triton-common-logging.dir/logging.cc.o
[ 17%] Building CXX object _deps/repo-common-build/src/CMakeFiles/triton-common-async-work-queue.dir/async_work_queue.cc.o
cd /tmp/triton-build/pytorch/build/_deps/repo-common-build/src && /usr/bin/c++  -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src/../include -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror -MD -MT _deps/repo-common-build/src/CMakeFiles/triton-common-logging.dir/logging.cc.o -MF CMakeFiles/triton-common-logging.dir/logging.cc.o.d -o CMakeFiles/triton-common-logging.dir/logging.cc.o -c /tmp/triton-build/pytorch/build/_deps/repo-common-src/src/logging.cc
cd /tmp/triton-build/pytorch/build/_deps/repo-common-build/src && /usr/bin/c++  -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src/../include -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror -MD -MT _deps/repo-common-build/src/CMakeFiles/triton-common-async-work-queue.dir/async_work_queue.cc.o -MF CMakeFiles/triton-common-async-work-queue.dir/async_work_queue.cc.o.d -o CMakeFiles/triton-common-async-work-queue.dir/async_work_queue.cc.o -c /tmp/triton-build/pytorch/build/_deps/repo-common-src/src/async_work_queue.cc
[ 20%] Building CXX object _deps/repo-common-build/src/CMakeFiles/triton-common-table-printer.dir/table_printer.cc.o
cd /tmp/triton-build/pytorch/build/_deps/repo-common-build/src && /usr/bin/c++  -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src/../include -I/tmp/triton-build/pytorch/build/_deps/repo-common-src/src/src -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror -MD -MT _deps/repo-common-build/src/CMakeFiles/triton-common-table-printer.dir/table_printer.cc.o -MF CMakeFiles/triton-common-table-printer.dir/table_printer.cc.o.d -o CMakeFiles/triton-common-table-printer.dir/table_printer.cc.o -c /tmp/triton-build/pytorch/build/_deps/repo-common-src/src/table_printer.cc
docker pull nvcr.io/nvidia/pytorch:23.07-py3
make[2]: docker: Permission denied
make[2]: *** [CMakeFiles/ptlib_target.dir/build.make:104: libc10.so] Error 127
make[2]: Leaving directory '/tmp/triton-build/pytorch/build'
make[1]: *** [CMakeFiles/Makefile2:248: CMakeFiles/ptlib_target.dir/all] Error 2
make[1]: *** Waiting for unfinished jobs....
[..]
make[1]: Leaving directory '/tmp/triton-build/pytorch/build'
make: *** [Makefile:136: all] Error 2
error: build failed