ceccocats / tkDNN

Deep neural network library and toolkit to do high performace inference on NVIDIA jetson platforms
GNU General Public License v2.0
720 stars 209 forks source link

Compile error on tensorrt8 branch #276

Closed mryau closed 2 years ago

mryau commented 2 years ago

Ubuntu 20.04 Cuda 11.5 CudNN 8.3.1 TensorRT 8.2.2 OpenCV 4.52

Workflow as defined in README.md leads to the following error:

cmake .. 
make
[ 22%] Linking CXX shared library libkernels.so
/usr/bin/ld: CMakeFiles/kernels.dir/kernels_intermediate_link.o: relocation R_X86_64_PC32 against symbol `__fatbinwrap_dca1913b_17_activation_elu_cu_d406a46e' can not be used when making a shared object; recompile with -fPIC
/usr/bin/ld: final link failed: bad value
collect2: error: ld returned 1 exit status
make[2]: *** [CMakeFiles/kernels.dir/build.make:8225: libkernels.so] Error 1
make[1]: *** [CMakeFiles/Makefile2:872: CMakeFiles/kernels.dir/all] Error 2
make: *** [Makefile:130: all] Error 2

In case I explicitly say what build type I want all works perfectly:

cmake -DCMAKE_BUILD_TYPE=Release ..
make
...
[100%] Linking CXX executable test_yolo4_berkeley
[100%] Built target test_yolo4_berkeley
perseusdg commented 2 years ago

I believe its mentioned on the readme of the tensorrt8 branch to define build type https://github.com/ceccocats/tkDNN/tree/tensorrt8#how-to-compile-this-repo

mochechan commented 2 years ago

I also face the same problem with the tensorrt8 branch. Actually, the error messages are different if compiling with the master branch.

root@314654f59059:~/tkDNN# git checkout tensorrt8
Branch 'tensorrt8' set up to track remote branch 'tensorrt8' from 'origin'.
Switched to a new branch 'tensorrt8'
root@314654f59059:~/tkDNN# git status
On branch tensorrt8
Your branch is up to date with 'origin/tensorrt8'.

nothing to commit, working tree clean

root@314654f59059:~/tkDNN/build# cmake ..
-- CUDA Architecture: -arch=sm_86
-- CUDA Version: 11.4
-- CUDA Path: /usr/local/cuda
-- CUDA Libararies: /usr/local/cuda/lib64/libcudart_static.a;Threads::Threads;dl;/usr/lib/x86_64-linux-gnu/librt.so
-- CUDA Performance Primitives: /usr/local/cuda/lib64/libnppc.so;/usr/local/cuda/lib64/libnppial.so;/usr/local/cuda/lib64/libnppicc.so;;/usr/local/cuda/lib64/libnppidei.so;/usr/local/cuda/lib64/libnppif.so;/usr/local/cuda/lib64/libnppig.so;/usr/local/cuda/lib64/libnppim.so;/usr/local/cuda/lib64/libnppist.so;/usr/local/cuda/lib64/libnppisu.so;/usr/local/cuda/lib64/libnppitc.so;/usr/local/cuda/lib64/libnpps.so
-- Found CUDNN: /usr/lib/x86_64-linux-gnu/libcudnn.so
-- Found CUDNN include: /usr/include
-- Found NVINFER: /usr/lib/x86_64-linux-gnu/libnvinfer.so
-- Found NVINFER include: /usr/include/x86_64-linux-gnu
Eigen DIR: /usr/include/eigen3
install dir:/usr/local
-- Configuring done
-- Generating done
-- Build files have been written to: /root/tkDNN/build
root@314654f59059:~/tkDNN/build# make -j6
[  0%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_upsample.cu.o
[  1%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_activation_elu.cu.o
[  1%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_activation_leaky.cu.o
[  2%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_activation_logistic.cu.o
[  3%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_activation_mish.cu.o
[  3%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_activation_relu_ceiling.cu.o
[  4%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_activation_sigmoid.cu.o
[  4%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_convert.cu.o
[  5%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_deformable_conv.cu.o
[  6%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_fill.cu.o
[  6%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_normalize.cu.o
[  7%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_pooling.cu.o
[  7%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_postprocessing.cu.o
[  8%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_reorg.cu.o
[  9%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_resize.cu.o
[  9%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_scaladd.cu.o
[ 10%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_shortcut.cu.o
[ 11%] Building NVCC (Device) object CMakeFiles/kernels.dir/src/kernels/kernels_generated_softmax.cu.o
[ 12%] Building NVCC intermediate link file CMakeFiles/kernels.dir/kernels_intermediate_link.o
[ 13%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ActivationLeakyRT.cpp.o
[ 14%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ActivationLogisticRT.cpp.o
[ 14%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ActivationMishRT.cpp.o
[ 14%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ActivationReLUCeilingRT.cpp.o
[ 15%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/DeformableConvRT.cpp.o
[ 15%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/FlattenConcatRT.cpp.o
[ 16%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/MaxPoolingSizeRT.cpp.o
[ 17%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/RegionRT.cpp.o
[ 17%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ReorgRT.cpp.o
[ 18%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ReshapeRT.cpp.o
[ 18%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ResizeLayerRT.cpp.o
[ 19%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/RouteRT.cpp.o
[ 20%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/ShortcutRT.cpp.o
[ 20%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/UpsampleRT.cpp.o
[ 21%] Building CXX object CMakeFiles/kernels.dir/src/pluginsRT/YoloRT.cpp.o
[ 22%] Linking CXX shared library libkernels.so
/usr/bin/ld: CMakeFiles/kernels.dir/kernels_intermediate_link.o: relocation R_X86_64_PC32 against symbol `__fatbinwrap_49_tmpxft_000003f0_00000000_7_activation_elu_cpp1_ii_d406a46e' can not be used when making a shared object; recompile with -fPIC
/usr/bin/ld: final link failed: bad value
collect2: error: ld returned 1 exit status
make[2]: *** [CMakeFiles/kernels.dir/build.make:5477: libkernels.so] Error 1
make[1]: *** [CMakeFiles/Makefile2:179: CMakeFiles/kernels.dir/all] Error 2
make: *** [Makefile:136: all] Error 2
perseusdg commented 2 years ago

If you define the build type it should resolve your issue, tkDNN on tensorrt8 branches has its flags split into debug and release as shown below

if(UNIX)
    if(CMAKE_BUILD_TYPE MATCHES Release)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -fPIC -Wno-deprecated-declarations -Wno-unused-variable -O3")
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --maxrregcount=32)
    endif()

    if(CMAKE_BUILD_TYPE MATCHES Debug)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -fPIC -Wno-deprecated-declarations -Wno-unused-variable -g3")
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --maxrregcount=32 -G -g)
    endif()
endif()

if you dont want to define the build type then append -fPIC to CMAKE_CXX_FLAGS similar to what is there on the master branch https://github.com/ceccocats/tkDNN/blob/04de9908a68588c32dcf94d84b59fdf71cf8d9a8/CMakeLists.txt#L6

mochechan commented 2 years ago

Unfortunately, the "-fPIC" is already in my CMakeList.txt. Any other ideas? Thanks a lot.

    38  if(UNIX)
    39      if(CMAKE_BUILD_TYPE MATCHES Release)
    40          set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -fPIC -Wno-deprecated-declarations -Wno-unused-variable -O3")
    41          set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --maxrregcount=32)
    42      endif()
    43  
    44      if(CMAKE_BUILD_TYPE MATCHES Debug)
    45          set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -fPIC -Wno-deprecated-declarations -Wno-unused-variable -g3")
    46          set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --maxrregcount=32 -G -g)
    47      endif()
    48  endif()
perseusdg commented 2 years ago

I meant doing something like this

if(UNIX)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
    if(CMAKE_BUILD_TYPE MATCHES Release)
       ....
endif()

Since the CMakeList.txt you are using currently ,needs a build type to be defined for -fPIC , moving the -fPIC definition to just above the individual build type definitions should do the trick in case no build type is defined

mochechan commented 2 years ago

It works. Thank you.