ginkgo-project / ginkgo

Numerical linear algebra software package
https://ginkgo-project.github.io/
BSD 3-Clause "New" or "Revised" License
384 stars 86 forks source link

Build failure with CUDA 12.4 #1564

Closed lahwaacz closed 2 months ago

lahwaacz commented 3 months ago

Building ginkgo with CUDA 12.4 currently fails:

FAILED: cuda/CMakeFiles/ginkgo_cuda.dir/base/device_matrix_data_kernels.cu.o
/opt/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGKO_COMPILING_CUDA -Dginkgo_cuda_EXPORTS -I/build/ginkgo-hpc-git/src/build/cuda/.. -I/build/ginkgo-hpc-git/src/build/include -I/build/ginkgo-hpc-git/src/ginkgo/include -I/build/ginkgo-hpc-git/src/ginkgo -isystem /opt/cuda/targets/x86_64-linux/include -isystem /opt/cuda/targets/x86_64-linux/include/nvtx3/.. -std=c++17 "--generate-code=arch=compute_50,code=[compute_50,sm_50]" "--generate-code=arch=compute_52,code=[compute_52,sm_52]" "--generate-code=arch=compute_53,code=[compute_53,sm_53]" "--generate-code=arch=compute_60,code=[compute_60,sm_60]" "--generate-code=arch=compute_61,code=[compute_61,sm_61]" "--generate-code=arch=compute_62,code=[compute_62,sm_62]" "--generate-code=arch=compute_70,code=[compute_70,sm_70]" "--generate-code=arch=compute_72,code=[compute_72,sm_72]" "--generate-code=arch=compute_75,code=[compute_75,sm_75]" "--generate-code=arch=compute_80,code=[compute_80,sm_80]" "--generate-code=arch=compute_86,code=[compute_86,sm_86]" "--generate-code=arch=compute_87,code=[compute_87,sm_87]" "--generate-code=arch=compute_89,code=[compute_89,sm_89]" "--generate-code=arch=compute_90,code=[compute_90,sm_90]" -Xcompiler=-fPIC --expt-relaxed-constexpr --expt-extended-lambda -MD -MT cuda/CMakeFiles/ginkgo_cuda.dir/base/device_matrix_data_kernels.cu.o -MF cuda/CMakeFiles/ginkgo_cuda.dir/base/device_matrix_data_kernels.cu.o.d -x cu -c /build/ginkgo-hpc-git/src/ginkgo/cuda/base/device_matrix_data_kernels.cu -o cuda/CMakeFiles/ginkgo_cuda.dir/base/device_matrix_data_kernels.cu.o
/opt/cuda/targets/x86_64-linux/include/thrust/system/cuda/detail/reduce_by_key.h(692): error: ambiguous "?" operation: second operand of type "thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::tuple_of_iterator_references<const int &, const int &>" can be converted to third operand type "cuda::std::__4::tuple<int, int>", and vice versa
                                       : key_type();
                                       ^
          detected during:
            instantiation of "void thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::impl::consume_subsequent_tile<IS_LAST_TILE>(Size, int, Size, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::ScanTileState &) [with KeysInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValuesInputIt=const float *, KeysOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValuesOutputIt=float *, EqualityOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, ReductionOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>, NumRunsOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t *, Size=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t, IS_LAST_TILE=false]" at line 773
            instantiation of "void thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::impl::consume_tile<IS_LAST_TILE>(thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::size_type, int, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::size_type, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::ScanTileState &) [with KeysInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValuesInputIt=const float *, KeysOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValuesOutputIt=float *, EqualityOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, ReductionOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>, NumRunsOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t *, Size=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t, IS_LAST_TILE=false]" at line 811
            instantiation of "thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::impl::impl(thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::TempStorage &, KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, NumRunsOutputIt, EqualityOp, ReductionOp, Size, int, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::ScanTileState &) [with KeysInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValuesInputIt=const float *, KeysOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValuesOutputIt=float *, EqualityOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, ReductionOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>, NumRunsOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t *, Size=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t]" at line 849
            instantiation of "void thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::entry(KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, NumRunsOutputIt, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<KeysInputIt, ValuesInputIt, KeysOutputIt, ValuesOutputIt, EqualityOp, ReductionOp, NumRunsOutputIt, Size>::ScanTileState, EqualityOp, ReductionOp, Size, int, char *) [with KeysInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValuesInputIt=const float *, KeysOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValuesOutputIt=float *, EqualityOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, ReductionOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>, NumRunsOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t *, Size=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t]" at line 150 of /opt/cuda/targets/x86_64-linux/include/thrust/system/cuda/detail/core/agent_launcher.h
            instantiation of "void thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::core::_kernel_agent<Agent,_0,_1,_2,_3,_4,_5,_6,_7,_8,_9>(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) [with Agent=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::__reduce_by_key::ReduceByKeyAgent<thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, const float *, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, float *, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t *, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t>, _0=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, _1=const float *, _2=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, _3=float *, _4=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t *, _5=cub::CUB_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::ReduceByKeyScanTileState<float, thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t, true>, _6=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, _7=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>, _8=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t, _9=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::int32_t]" at line 997 of /opt/cuda/targets/x86_64-linux/include/thrust/system/cuda/detail/core/agent_launcher.h
            [ 5 instantiation contexts not shown ]
            instantiation of "thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::pair<KeyOutputIt, ValOutputIt> thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::reduce_by_key(thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execution_policy<Derived> &, KeyInputIt, KeyInputIt, ValInputIt, KeyOutputIt, ValOutputIt, BinaryPred, BinaryOp) [with Derived=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execute_on_stream, KeyInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValInputIt=const float *, KeyOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValOutputIt=float *, BinaryPred=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>, BinaryOp=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::plus<float>]" at line 1184
            instantiation of "thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::pair<KeyOutputIt, ValOutputIt> thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::reduce_by_key(thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execution_policy<Derived> &, KeyInputIt, KeyInputIt, ValInputIt, KeyOutputIt, ValOutputIt, BinaryPred) [with Derived=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execute_on_stream, KeyInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValInputIt=const float *, KeyOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValOutputIt=float *, BinaryPred=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::equal_to<cuda::std::__4::tuple<int, int>>]" at line 1207
            instantiation of "thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::pair<KeyOutputIt, ValOutputIt> thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::reduce_by_key(thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execution_policy<Derived> &, KeyInputIt, KeyInputIt, ValInputIt, KeyOutputIt, ValOutputIt) [with Derived=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execute_on_stream, KeyInputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, ValInputIt=const float *, KeyOutputIt=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, ValOutputIt=float *]" at line 97 of /opt/cuda/targets/x86_64-linux/include/thrust/detail/reduce.inl
            instantiation of "thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::pair<OutputIterator1, OutputIterator2> thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::reduce_by_key(const thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::detail::execution_policy_base<DerivedPolicy> &, InputIterator1, InputIterator1, InputIterator2, OutputIterator1, OutputIterator2) [with DerivedPolicy=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::cuda_cub::execute_on_stream, InputIterator1=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<const int *, const int *>>, InputIterator2=const float *, OutputIterator1=thrust::THRUST_200301_500_520_530_600_610_620_700_720_750_800_860_870_890_900_NS::zip_iterator<cuda::std::__4::tuple<int *, int *>>, OutputIterator2=float *]" at line 76 of /build/ginkgo-hpc-git/src/ginkgo/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc
            instantiation of "void gko::kernels::cuda::components::sum_duplicates(std::shared_ptr<const gko::CudaExecutor>, gko::size_type, gko::array<ValueType> &, gko::array<IndexType> &, gko::array<IndexType> &) [with ValueType=float, IndexType=int]" at line 84 of /build/ginkgo-hpc-git/src/ginkgo/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc

The issue is likely nvcc, but please report it to NVIDIA or find a workaround.

upsj commented 3 months ago

There would be a possible workaround, but it involves a const_cast I would ideally like to avoid, so let's see what the NVIDIA folks have to say