The cudaMallocManaged function in cuda_runtime.h is defined with 1st parameter in type void**, which leads to the following compilation errors when passing arguments. (This compilation error is from nbnxm-cuda benchmark in HeCBench, a reproducer is included in the end.)
main.cu:409:3: error: no matching function for call to 'cudaMallocManaged'
409 | cudaMallocManaged(&a_xq, sizeof(Float4) * NUM_ATOMS);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'Float4 **' (aka 'HIP_vector_type<float, 4> **') to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:412:3: error: no matching function for call to 'cudaMallocManaged'
412 | cudaMallocManaged(&a_f, sizeof(Float3) * NUM_ATOMS);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'Float3 **' (aka 'BasicVector<float> **') to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:415:3: error: no matching function for call to 'cudaMallocManaged'
415 | cudaMallocManaged(&shiftVec, sizeof(Float3) * 45);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'Float3 **' (aka 'BasicVector<float> **') to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:418:3: error: no matching function for call to 'cudaMallocManaged'
418 | cudaMallocManaged(&fShift, sizeof(Float3) * 45);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'Float3 **' (aka 'BasicVector<float> **') to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:421:3: error: no matching function for call to 'cudaMallocManaged'
421 | cudaMallocManaged(&cj4, sizeof(nbnxn_cj4_t) * 56881);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'nbnxn_cj4_t **' to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:424:3: error: no matching function for call to 'cudaMallocManaged'
424 | cudaMallocManaged(&sci, sizeof(nbnxn_sci_t) * 4806);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'nbnxn_sci_t **' (aka 'nbnxn_sci **') to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:427:3: error: no matching function for call to 'cudaMallocManaged'
427 | cudaMallocManaged(&excl, sizeof(nbnxn_excl_t) * 19205);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'nbnxn_excl_t **' to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:430:3: error: no matching function for call to 'cudaMallocManaged'
430 | cudaMallocManaged(&atomTypes, sizeof(int) * NUM_ATOMS);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'int **' to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
main.cu:433:3: error: no matching function for call to 'cudaMallocManaged'
433 | cudaMallocManaged(&nbfp, sizeof(Float2) * 1024);
| ^~~~~~~~~~~~~~~~~
/home/tsaini.chen/install/chipStar/chipStar_06052024/include/cuspv/cuda_runtime.h:778:27: note: candidate function not viable: no known conversion from 'Float2 **' (aka 'HIP_vector_type<float, 2> **') to 'void **' for 1st argument
778 | static inline cudaError_t cudaMallocManaged(void **DevPtr, size_t Size,
| ^ ~~~~~~~~~~~~~
The proposed solution in this PR is to use generic type T and cast the argument for the underlying hip call, which is the same as what the actual CUDA does in the NIVIDA version of cuda_runtime.h (line 629). Here is also a CUDA documentation with the function declaration (search for cudaMallocManaged).
Thank you!!
[Reproducer]
Get the HeCBench set with git clone https://github.com/zjin-lcf/HeCBench.git
Navigate to the nbnxm-cuda benchmark with cd HeCBench/src/nbnxm-cuda
Have chipStar loaded and compile with make CC=cucc
The cudaMallocManaged function in cuda_runtime.h is defined with 1st parameter in type void**, which leads to the following compilation errors when passing arguments. (This compilation error is from nbnxm-cuda benchmark in HeCBench, a reproducer is included in the end.)
The proposed solution in this PR is to use generic type T and cast the argument for the underlying hip call, which is the same as what the actual CUDA does in the NIVIDA version of cuda_runtime.h (line 629). Here is also a CUDA documentation with the function declaration (search for
cudaMallocManaged
).Thank you!!
[Reproducer]
git clone https://github.com/zjin-lcf/HeCBench.git
cd HeCBench/src/nbnxm-cuda
make CC=cucc