OE4T / meta-tegra

BSP layer for NVIDIA Jetson platforms, based on L4T
MIT License
385 stars 216 forks source link

Building simple CUDA application with CMAKE fails when using SDK #1443

Closed lms-ts closed 6 months ago

lms-ts commented 6 months ago

I am using the latest meta-tegra (nanbield) layer to build a custom image for a Jetson AGX Orin devkit (machine: jetson-agx-orin-devkit). When building a simple CMAKE/CUDA application via recipe, the compilation succeeds.

The recipe:

SUMMARY = "Simple Cmake/CUDA application"
SECTION = "examples"
LICENSE = "CLOSED"

SRC_URI = "\
            file://CMakeLists.txt \
            file://example.cu \
        "

S = "${WORKDIR}"

inherit cmake cuda

EXTRA_OECMAKE = ""

CMakeLists.txt:

cmake_minimum_required(VERSION 3.27)

project(v4l2-cuda-test
    VERSION 1.0
    HOMEPAGE_URL "https://lmswet-bitbucket01.europe.leicams.com/"
    LANGUAGES C CXX CUDA
)

set(EXEC_NAME "cmake-cuda-example")

add_executable(${EXEC_NAME} example.cu)
install(TARGETS ${EXEC_NAME} RUNTIME DESTINATION bin)

example.cu is taken from here: https://gist.github.com/dpiponi/1502434#file-example-cu

When trying to build the same application via SDK with

TOOLCHAIN_HOST_TASK:append = " nativesdk-cmake nativesdk-packagegroup-cuda-sdk-host"

set in local.conf the build fails.

Build Steps (within terminal):

#Source the SDK environment script
. /opt/leica-nvidia-cuda/environment-setup-armv8a-lms-linux
mkdir build
cd build
cmake ..

The build seems to fail when trying to set CUDA as language with the following error message:

-- The C compiler identification is GNU 13.2.0
-- The CXX compiler identification is GNU 13.2.0
CMake Error at /opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/share/cmake-3.27/Modules/CMakeDetermineCompilerId.cmake:753 (message):
  Compiling the CUDA compiler identification source file
  "CMakeCUDACompilerId.cu" failed.

  Compiler:
  /opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/nvcc

  Build flags:
  --gpu-architecture=compute_87;--gpu-code=sm_87;--include-path;/opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/local/cuda-11.4/include;--library-path;/opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/local/cuda-11.4/lib;-Xcompiler;--sysroot=/opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux;-Xcompiler;-march=armv8-a+crc,-mbranch-protection=standard

  Id flags:
  --keep;--keep-dir;tmp;-ccbin=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux/aarch64-lms-linux-g++-10.3.0
  -v

  The output was:

  1

  #$ _NVVM_BRANCH_=nvvm

  #$ _SPACE_=

  #$ _CUDART_=cudart

  #$
  _HERE_=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin

  #$
  _THERE_=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin

  #$ _TARGET_SIZE_=

  #$ _TARGET_DIR_=

  #$ _TARGET_SIZE_=64

  #$
  TOP=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..

  #$
  NVVMIR_LIBRARY_DIR=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/../nvvm/libdevice

  #$
  LD_LIBRARY_PATH=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/../lib:

  #$
  PATH=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/../nvvm/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/sbin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/sbin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/../x86_64-oesdk-linux/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux-musl:/home/timoschuster/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin

  #$
  INCLUDES="-I/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//include"

  #$ LIBRARIES=
  "-L/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//lib64/stubs"
  "-L/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//lib64"

  #$ CUDAFE_FLAGS=

  #$ PTXAS_FLAGS=

  #$ rm tmp/a_dlink.reg.c

  #$
  "/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux"/aarch64-lms-linux-g++-10.3.0
  -D__CUDA_ARCH__=870 -E -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__
  -D__NVCC__ --sysroot=/opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux
  -march=armv8-a+crc -mbranch-protection=standard
  -I"/opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/local/cuda-11.4/include"
  "-I/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//include"
  -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=4
  -D__CUDACC_VER_BUILD__=315 -D__CUDA_API_VER_MAJOR__=11
  -D__CUDA_API_VER_MINOR__=4 -include "cuda_runtime.h"
  "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii"

  #$ cicc --c++14 --gnu_version=100300 --orig_src_file_name
  "CMakeCUDACompilerId.cu" --allow_managed --unsigned_chars
  --unsigned_wchar_t --arm_cross_compiler -arch compute_87 -m64
  --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1
  --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused
  --gen_module_id_file --module_id_file_name
  "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name
  "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name
  "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name
  "tmp/CMakeCUDACompilerId.cudafe1.gpu" "tmp/CMakeCUDACompilerId.cpp1.ii" -o
  "tmp/CMakeCUDACompilerId.ptx"

  /opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/include/bits/math-vector-64.h(30):
  error: identifier "__Float32x4_t" is undefined

  /opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/include/bits/math-vector-64.h(31):
  error: identifier "__Float64x2_t" is undefined

  /opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/include/bits/math-vector-64.h(40):
  error: identifier "__SVFloat32_t" is undefined

  /opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/include/bits/math-vector-64.h(41):
  error: identifier "__SVFloat64_t" is undefined

  /opt/leica-nvidia-cuda/sysroots/armv8a-lms-linux/usr/include/bits/math-vector-64.h(42):
  error: identifier "__SVBool_t" is undefined

  5 errors detected in the compilation of "CMakeCUDACompilerId.cu".

  # --error 0x1 --

  Compiling the CUDA compiler identification source file
  "CMakeCUDACompilerId.cu" failed.

  Compiler:
  /opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/nvcc

  Build flags:

  Id flags:
  --keep;--keep-dir;tmp;-ccbin=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux/aarch64-lms-linux-g++-10.3.0
  -v

  The output was:

  1

  #$ _NVVM_BRANCH_=nvvm

  #$ _SPACE_=

  #$ _CUDART_=cudart

  #$
  _HERE_=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin

  #$
  _THERE_=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin

  #$ _TARGET_SIZE_=

  #$ _TARGET_DIR_=

  #$ _TARGET_SIZE_=64

  #$
  TOP=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..

  #$
  NVVMIR_LIBRARY_DIR=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/../nvvm/libdevice

  #$
  LD_LIBRARY_PATH=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/../lib:

  #$
  PATH=/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/../nvvm/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/sbin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/sbin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/../x86_64-oesdk-linux/bin:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux:/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux-musl:/home/timoschuster/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin

  #$
  INCLUDES="-I/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//include"

  #$ LIBRARIES=
  "-L/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//lib64/stubs"
  "-L/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//lib64"

  #$ CUDAFE_FLAGS=

  #$ PTXAS_FLAGS=

  #$ rm tmp/a_dlink.reg.c

  #$
  "/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-lms-linux"/aarch64-lms-linux-g++-10.3.0
  -D__CUDA_ARCH__=520 -E -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__
  -D__NVCC__
  "-I/opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/local/cuda-11.4/bin/..//include"
  -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=4
  -D__CUDACC_VER_BUILD__=315 -D__CUDA_API_VER_MAJOR__=11
  -D__CUDA_API_VER_MINOR__=4 -include "cuda_runtime.h"
  "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii"

  cc1plus: fatal error: cuda_runtime.h: No such file or directory

  compilation terminated.

  # --error 0x1 --

Call Stack (most recent call first):
  /opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/share/cmake-3.27/Modules/CMakeDetermineCompilerId.cmake:8 (CMAKE_DETERMINE_COMPILER_ID_BUILD)
  /opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/share/cmake-3.27/Modules/CMakeDetermineCompilerId.cmake:53 (__determine_compiler_id_test)
  /opt/leica-nvidia-cuda/sysroots/x86_64-oesdk-linux/usr/share/cmake-3.27/Modules/CMakeDetermineCUDACompiler.cmake:307 (CMAKE_DETERMINE_COMPILER_ID)
  CMakeLists.txt:3 (project)

-- Configuring incomplete, errors occurred!

With mickledore this problem does not occur and building the application (and other, much more complex ones) works fine.

madisongh commented 6 months ago

This was due to the glibc 2.38 update that came in with nanbield. I had a fix for it in cuda.bbclass for bitbake builds, but forgot to propagate that into the SDK environment. I've cherry-picked #1445 to the nanbield branch, so it should be fixed there now as well.