openppl-public / ppl.cv

ppl.cv is a high-performance image processing library of openPPL supporting various platforms.
Apache License 2.0
484 stars 108 forks source link

Cuda 12.0 Failed build #136

Open Maxfashko opened 2 months ago

Maxfashko commented 2 months ago

16 42.54 [ 86%] Building CUDA object CMakeFiles/pplcv_static.dir/src/ppl/cv/cuda/sepfilter2d.cu.o

16 42.72 /opt/ppl.cv/src/ppl/cv/cuda/norm.hpp(687): warning #173-D: floating-point value does not fit in required integral type

16 42.72 partial_mins[threadIdx_x] = 3.40282346638528859811704183484516925e+38F;

16 42.72 ^

16 42.72 detected during instantiation of "void ppl::cv::cuda::MinMaxKernel(const Tsrc , int, int, int, int, const ppl::cv::cuda::uchar , int, int, Tdst *) [with Tsrc=ppl::cv::cuda::uchar, Tdst=long long]" at line 207 of /opt/ppl.cv/src/ppl/cv/cuda/normalize.cu

16 42.72

16 42.72 Remark: The warnings can be suppressed with "-diag-suppress "

16 42.72

16 42.72 /opt/ppl.cv/src/ppl/cv/cuda/norm.hpp(832): warning #173-D: floating-point value does not fit in required integral type

16 42.72 partial_mins[threadIdx_x] = 3.40282346638528859811704183484516925e+38F;

16 42.72 ^

16 42.72 detected during instantiation of "void ppl::cv::cuda::MinMaxKernel(const Tsrc , int, int, int, int, const ppl::cv::cuda::uchar , int, int, Tdst *) [with Tsrc=ppl::cv::cuda::uchar, Tdst=long long]" at line 207 of /opt/ppl.cv/src/ppl/cv/cuda/normalize.cu

16 42.72

16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(32): error: texture is not a template

16 42.77 static texture<uchar, 0x02,

16 42.77 ^

16 42.77

16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(34): error: texture is not a template

16 42.77 static texture<uchar4, 0x02,

16 42.77 ^

16 42.77

16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(36): error: texture is not a template

16 42.77 static texture<float, 0x02,

16 42.77 ^

16 42.77

16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(126): error: no instance of overloaded function "tex2D" matches the argument list

16 42.77 argument types are: (, float, float)

16 42.77 float value = tex2D(uchar_c1_ref, coordinate_x, coordinate_y);

16 42.77 ^

16 42.77

16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(133): error: no instance of overloaded function "tex2D" matches the argument list

16 42.77 argument types are: (, float, float)

16 42.77 float4 value = tex2D(uchar_c4_ref, coordinate_x, coordinate_y);

16 42.77 ^

16 42.77

16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(158): error: no instance of overloaded function "tex2D" matches the argument list

16 42.77 argument types are: (, float, float)

16 42.77 float value = tex2D(float_c1_ref, coordinate_x, coordinate_y);

16 42.77 ^

16 42.77

16 42.78 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(767): error: no instance of overloaded function "tex2D" matches the argument list

16 42.78 argument types are: (, float, float)

16 42.78 float value = tex2D(uchar_c1_ref, float_x, float_y);

16 42.78 ^

16 42.78

16 42.78 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(774): error: no instance of overloaded function "tex2D" matches the argument list

16 42.78 argument types are: (, float, float)

16 42.78 float4 value = tex2D(uchar_c4_ref, float_x, float_y);

16 42.78 ^

16 42.78

16 42.78 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(808): error: no instance of overloaded function "tex2D" matches the argument list

16 42.78 argument types are: (, float, float)

16 42.78 float value = tex2D(float_c1_ref, float_x, float_y);

16 42.78 ^

16 42.78

16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1069): error: identifier "cudaBindTexture2D" is undefined

16 42.79 code = cudaBindTexture2D(0, uchar_c1_ref, src, desc, src_cols, src_rows,

16 42.79 ^

16 42.79

16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1085): error: identifier "cudaBindTexture2D" is undefined

16 42.79 code = cudaBindTexture2D(0, uchar_c4_ref, src, desc, src_cols, src_rows,

16 42.79 ^

16 42.79

16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1162): error: identifier "cudaBindTexture2D" is undefined

16 42.79 code = cudaBindTexture2D(0, uchar_c1_ref, src, desc, src_cols, src_rows,

16 42.79 ^

16 42.79

16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1179): error: identifier "cudaBindTexture2D" is undefined

16 42.79 code = cudaBindTexture2D(0, uchar_c4_ref, src, desc, src_cols, src_rows,

16 42.79 ^

16 42.79

16 42.80 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1264): error: identifier "cudaBindTexture2D" is undefined

16 42.80 code = cudaBindTexture2D(0, float_c1_ref, src, desc, src_cols, src_rows,

16 42.80 ^

16 42.80

16 42.80 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1341): error: identifier "cudaBindTexture2D" is undefined

16 42.80 code = cudaBindTexture2D(0, float_c1_ref, src, desc, src_cols, src_rows,

16 42.80 ^

16 42.80

16 42.81 15 errors detected in the compilation of "/opt/ppl.cv/src/ppl/cv/cuda/resize.cu".

16 42.84 make[2]: *** [CMakeFiles/pplcv_static.dir/build.make:585: CMakeFiles/pplcv_static.dir/src/ppl/cv/cuda/resize.cu.o] Error 2

16 42.84 make[2]: *** Waiting for unfinished jobs....

16 44.18 /opt/ppl.cv/src/ppl/cv/cuda/norm.hpp(687): warning #173-D: floating-point value does not fit in required integral type

16 44.18 partial_mins[threadIdx_x] = 3.40282346638528859811704183484516925e+38F;

jimurk commented 1 month ago

Some operators in ppl.cv used texture reference APIs which is no longer supported in CUDA 12, you can remove these code and use existing ordinary cuda implementation since they were used for performance.

pieris98 commented 1 month ago

I've made this PR. As mentioned in the PR, this just builds for my setup (RTX3090, sm86 compute capability, CUDA toolkit version 12.2) but I haven't tested for correctness yet. To make the cmake build work in my case I also had to dirty code in my sm86 and comment out some older sm compute capabilities which were apparently not supported for CUDA 12 and gave errors. This is not perfect but did the job. Here's my cuda.cmake file:

include(${HPCC_DEPS_DIR}/hpcc/cmake/cuda-common.cmake)

if(PPLCV_USE_MSVC_STATIC_RUNTIME)
    hpcc_cuda_use_msvc_static_runtime()
endif()

set(_NVCC_FLAGS )
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35")
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_37,code=sm_37")
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_50,code=sm_50")
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_53,code=sm_53")
#if(CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "8")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_62,code=sm_62")
#endif()
#if(CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "9")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70")
#endif()
#if(CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "10")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_72,code=sm_72")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")
#endif()
if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "11")
    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")
    if (CUDA_VERSION_MINOR VERSION_GREATER_EQUAL "1")
        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
    endif ()
    #if (CUDA_VERSION_MINOR VERSION_GREATER_EQUAL "4")
    #    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_87,code=sm_87")
    #endif ()
endif ()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${_NVCC_FLAGS}")

# --------------------------------------------------------------------------- #

file(GLOB PPLCV_CUDA_PUBLIC_HEADERS src/ppl/cv/cuda/*.h)
install(FILES ${PPLCV_CUDA_PUBLIC_HEADERS}
    DESTINATION include/ppl/cv/cuda)

list(APPEND PPLCV_COMPILE_DEFINITIONS PPLCV_USE_CUDA)

file(GLOB PPLCV_CUDA_SRC src/ppl/cv/cuda/*.cpp src/ppl/cv/cuda/utility/*.cpp)
file(GLOB PPLCV_CUDA_CU  src/ppl/cv/cuda/*.cu)
list(APPEND PPLCV_SRC ${PPLCV_CUDA_SRC} ${PPLCV_CUDA_CU})
list(APPEND PPLCV_INCLUDE_DIRECTORIES $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>)
list(APPEND PPLCV_LINK_LIBRARIES $<BUILD_INTERFACE:${CUDA_LIBRARIES}>)

# glog benchmark and unittest sources
file(GLOB PPLCV_CUDA_BENCHMARK_SRC src/ppl/cv/cuda/*_benchmark.cpp)
file(GLOB PPLCV_CUDA_UNITTEST_SRC src/ppl/cv/cuda/*_unittest.cpp)
list(APPEND PPLCV_BENCHMARK_SRC ${PPLCV_CUDA_BENCHMARK_SRC})
list(APPEND PPLCV_UNITTEST_SRC ${PPLCV_CUDA_UNITTEST_SRC})