Closed zhenweilin closed 8 months ago
I aim to use cusparseAxpby API. I set the alpha and beta to the device, it returns Segmentation fault. It seems that the API does not support alpha and beta on the device, which is different from the official document.
#include <cusparse.h> // cusparseAxpby #include <stdio.h> // printf #include <stdlib.h> // EXIT_FAILURE #define CHECK_CUDA(func) \ { \ cudaError_t status = (func); \ if (status != cudaSuccess) { \ printf("CUDA API failed at line %d with error: %s (%d)\n", \ __LINE__, cudaGetErrorString(status), status); \ return EXIT_FAILURE; \ } \ } #define CHECK_CUSPARSE(func) \ { \ cusparseStatus_t status = (func); \ if (status != CUSPARSE_STATUS_SUCCESS) { \ printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \ __LINE__, cusparseGetErrorString(status), status); \ return EXIT_FAILURE; \ } \ } int main(void) { // Host problem definition int size = 8; int nnz = 4; int hX_indices[] = { 0, 3, 4, 7 }; float hX_values[] = { 1.0f, 2.0f, 3.0f, 4.0f }; float hY[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; float hY_result[] = { 5.0f, 6.0f, 9.0f, 16.0f, 21.0f, 18.0f, 21.0f, 32.0f }; float alpha = 2.0f; float beta = 3.0f; // put alpha and beta into device memory float *d_alpha, *d_beta; CHECK_CUDA( cudaMalloc((void**) &d_alpha, sizeof(float)) ) CHECK_CUDA( cudaMalloc((void**) &d_beta, sizeof(float)) ) CHECK_CUDA( cudaMemcpy(d_alpha, &alpha, sizeof(float), cudaMemcpyHostToDevice) ) CHECK_CUDA( cudaMemcpy(d_beta, &beta, sizeof(float), cudaMemcpyHostToDevice) ) //-------------------------------------------------------------------------- // Device memory management int *dX_indices; float *dY, *dX_values; CHECK_CUDA( cudaMalloc((void**) &dX_indices, nnz * sizeof(int)) ) CHECK_CUDA( cudaMalloc((void**) &dX_values, nnz * sizeof(float)) ) CHECK_CUDA( cudaMalloc((void**) &dY, size * sizeof(float)) ) CHECK_CUDA( cudaMemcpy(dX_indices, hX_indices, nnz * sizeof(int), cudaMemcpyHostToDevice) ) CHECK_CUDA( cudaMemcpy(dX_values, hX_values, nnz * sizeof(float), cudaMemcpyHostToDevice) ) CHECK_CUDA( cudaMemcpy(dY, hY, size * sizeof(float), cudaMemcpyHostToDevice) ) //-------------------------------------------------------------------------- // CUSPARSE APIs cusparseHandle_t handle = NULL; cusparseSpVecDescr_t vecX; cusparseDnVecDescr_t vecY; CHECK_CUSPARSE( cusparseCreate(&handle) ) // Create sparse vector X CHECK_CUSPARSE( cusparseCreateSpVec(&vecX, size, nnz, dX_indices, dX_values, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F) ) // Create dense vector y CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, size, dY, CUDA_R_32F) ) // execute Axpby CHECK_CUSPARSE( cusparseAxpby(handle, d_alpha, vecX, d_beta, vecY) ) // destroy matrix/vector descriptors CHECK_CUSPARSE( cusparseDestroySpVec(vecX) ) CHECK_CUSPARSE( cusparseDestroyDnVec(vecY) ) CHECK_CUSPARSE( cusparseDestroy(handle) ) //-------------------------------------------------------------------------- // device result check CHECK_CUDA( cudaMemcpy(hY, dY, size * sizeof(float), cudaMemcpyDeviceToHost) ) int correct = 1; for (int i = 0; i < size; i++) { if (hY[i] != hY_result[i]) { // direct floating point comparison is not correct = 0; // reliable in standard code break; } } if (correct) printf("axpby_example test PASSED\n"); else printf("axpby_example test FAILED: wrong result\n"); //-------------------------------------------------------------------------- // device memory deallocation CHECK_CUDA( cudaFree(dX_indices) ) CHECK_CUDA( cudaFree(dX_values) ) CHECK_CUDA( cudaFree(dY) ) return EXIT_SUCCESS; }```
You are missing the cusparseSetPointerMode() call. Otherwise, the cuSPARSE APIs don't know that the pointers are related to device memory
cusparseSetPointerMode()
Thank you very much, it works.
I aim to use cusparseAxpby API. I set the alpha and beta to the device, it returns Segmentation fault. It seems that the API does not support alpha and beta on the device, which is different from the official document.