I tried to use sparse matrix vector multiplication with the library and I modifeid the sample_spmv.cc as follows:
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_MINIMUM_OPENCL_VERSION BUILD_CLVERSION
#define CL_HPP_TARGET_OPENCL_VERSION BUILD_CLVERSION
#include <CL/cl.hpp>
#include "clSPARSE.h"
#include "clSPARSE-error.h"
#include <vector>
#include <string>
#include <iostream>
#include <chrono>
#include <typeinfo>
/**
* @name csrMtx
* @info Structure stores the data results in converting sparse matrix to CSR format
*
* @author Mehdi
*/
struct csrMtx
{
float* value = NULL;
int* col = NULL;
int* row_ptr = NULL;
int nnz = 0;
};
/**
* @name denseMtxGen
* @info Create a matrix results in FDM descritization of poison equation
*
* @author Mehdi
*/
void denseMtxGen(float* A, const int row, const int col)
{
int n = row * col;
for (int i = 0; i < n; ++i)
{
A[i * n + i ] = 4;
if (i + 1 < n) A[i * n + i + 1] = -1;
if (i - 1 >= 0) A[i * n + i - 1] = -1;
if (i + col < n) A[i * n + i + col] = -1;
if (i - col >= 0) A[i * n + i - col] = -1;
}
}
/**
* @name denseVecGen
* @info Create a vector filled with arbitary value
*
* @author Mehdi
*/
void denseVecGen(float* x, const int row)
{
for (int i = 0; i < row; ++i)
{
x[i] = 4;
}
}
/**
* @name csrMtx
* @info Convert sparse matrix to CSR format sparse matrix
* The data stores in csrMtx structure
* @author Mehdi
*/
struct csrMtx denseToCsr(float* A_dense, const int row, const int col)
{
csrMtx myCsrMtx;
int nzero_row = 5;
int A_col = row * col;
myCsrMtx.value = new float[A_col * nzero_row];
myCsrMtx.col = new int[A_col];
myCsrMtx.row_ptr = new int[A_col];
for(int i = 0; i < A_col; ++i)
{
for(int j = 0; j < A_col; ++j)
{
if (A_dense[i * A_col + j] != 0)
{
//std::cout <<"A[" << i << " * " << j << "] = " <<A_dense[i * A_col + j] << std::endl;
myCsrMtx.value[myCsrMtx.nnz] = A_dense[i * A_col + j];
myCsrMtx.col[myCsrMtx.nnz] = j;
myCsrMtx.row_ptr[myCsrMtx.nnz] = i;
myCsrMtx.nnz++;
}
}
}
return myCsrMtx;
}
int main (int argc, char* argv[])
{
/** Step 1. Setup OpenCL environment; **/
// Init OpenCL environment;
cl_int cl_status;
// Get OpenCL platforms
std::vector<cl::Platform> platforms;
cl_status = cl::Platform::get(&platforms);
if (cl_status != CL_SUCCESS)
{
std::cout << "Problem with getting OpenCL platforms"
<< " [" << cl_status << "]" << std::endl;
return -2;
}
int platform_id = 0;
for (const auto& p : platforms)
{
std::cout << "Platform ID " << platform_id++ << " : "
<< p.getInfo<CL_PLATFORM_NAME>() << std::endl;
}
// Using first platform
platform_id = 0;
cl::Platform platform = platforms[platform_id];
// Get device from platform
std::vector<cl::Device> devices;
cl_status = platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
if (cl_status != CL_SUCCESS)
{
std::cout << "Problem with getting devices from platform"
<< " [" << platform_id << "] " << platform.getInfo<CL_PLATFORM_NAME>()
<< " error: [" << cl_status << "]" << std::endl;
}
std::cout << std::endl
<< "Getting devices from platform " << platform_id << std::endl;
cl_int device_id = 0;
for (const auto& device : devices)
{
std::cout << "Device ID " << device_id++ << " : "
<< device.getInfo<CL_DEVICE_NAME>() << std::endl;
}
// Using first device;
device_id = 0;
cl::Device device = devices[device_id];
// Create OpenCL context;
cl::Context context(device);
// Create OpenCL queue;
cl::CommandQueue queue(context, device);
/** Step 2. Setup GPU buffers **/
//we will allocate it after matrix will be loaded;
clsparseScalar alpha;
clsparseInitScalar(&alpha);
alpha.value = clCreateBuffer(context(), CL_MEM_READ_ONLY, sizeof(float),
nullptr, &cl_status);
clsparseScalar beta;
clsparseInitScalar(&beta);
beta.value = clCreateBuffer(context(), CL_MEM_READ_ONLY, sizeof(float),
nullptr, &cl_status);
cldenseVector x;
clsparseInitVector(&x);
cldenseVector y;
clsparseInitVector(&y);
clsparseCsrMatrix A;
clsparseInitCsrMatrix(&A);
/** Step 3. Init clSPARSE library **/
clsparseStatus status = clsparseSetup();
if (status != clsparseSuccess)
{
std::cout << "Problem with executing clsparseSetup()" << std::endl;
return -3;
}
// Create clsparseControl object
clsparseCreateResult createResult = clsparseCreateControl( queue( ) );
CLSPARSE_V( createResult.status, "Failed to create clsparse control" );
int denseRow = 5;
int denseCol = 5;
float* A_dense = new float[denseRow * denseCol * denseRow * denseCol];
csrMtx myCsrMtx;
denseMtxGen(A_dense, denseRow, denseCol);
myCsrMtx = denseToCsr(A_dense, denseRow, denseCol);
// Read matrix from file. Calculates the rowBlocks structures as well.
clsparseIdx_t nnz, row, col;
A.num_nonzeros = myCsrMtx.nnz;
A.num_rows = row * col;
A.num_cols = row * col;
std::cout << typeid(A.values).name() << std::endl;
// Allocate memory for CSR matrix
A.values = ::clCreateBuffer( context(), CL_MEM_READ_ONLY,
A.num_nonzeros * sizeof( float ), NULL, &cl_status );
std::cout << cl_status << std::endl;
A.col_indices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY,
A.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status );
std::cout << "7" << std::endl;
A.row_pointer = ::clCreateBuffer( context(), CL_MEM_READ_ONLY,
( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status );
std::cout << "7" << std::endl;
float one = 1.0f;
float zero = 0.0f;
// alpha = 1;
float* halpha = (float*) clEnqueueMapBuffer(queue(), alpha.value, CL_TRUE, CL_MAP_WRITE,
0, sizeof(float), 0, nullptr, nullptr, &cl_status);
*halpha = one;
cl_status = clEnqueueUnmapMemObject(queue(), alpha.value, halpha,
0, nullptr, nullptr);
//beta = 0;
float* hbeta = (float*) clEnqueueMapBuffer(queue(), beta.value, CL_TRUE, CL_MAP_WRITE,
0, sizeof(float), 0, nullptr, nullptr, &cl_status);
*hbeta = zero;
cl_status = clEnqueueUnmapMemObject(queue(), beta.value, hbeta,
0, nullptr, nullptr);
x.num_values = A.num_cols;
x.values = clCreateBuffer(context(), CL_MEM_READ_ONLY, x.num_values * sizeof(float),
NULL, &cl_status);
cl_status = clEnqueueFillBuffer(queue(), x.values, &one, sizeof(float),
0, x.num_values * sizeof(float), 0, nullptr, nullptr);
y.num_values = A.num_rows;
y.values = clCreateBuffer(context(), CL_MEM_READ_WRITE, y.num_values * sizeof(float),
NULL, &cl_status);
cl_status = clEnqueueFillBuffer(queue(), y.values, &zero, sizeof(float),
0, y.num_values * sizeof(float), 0, nullptr, nullptr);
auto t1 = std::chrono::high_resolution_clock::now();
/**Step 4. Call the spmv algorithm */
status = clsparseScsrmv(&alpha, &A, &x, &beta, &y, createResult.control );
if (status != clsparseSuccess)
{
std::cout << "Problem with execution SpMV algorithm."
<< " Error: " << status << std::endl;
}
auto t2 = std::chrono::high_resolution_clock::now();
/** Step 5. Close & release resources */
status = clsparseReleaseControl( createResult.control );
if (status != clsparseSuccess)
{
std::cout << "Problem with releasing control object."
<< " Error: " << status << std::endl;
}
status = clsparseTeardown();
if (status != clsparseSuccess)
{
std::cout << "Problem with closing clSPARSE library."
<< " Error: " << status << std::endl;
}
//release mem;
clsparseCsrMetaDelete( &A );
clReleaseMemObject ( A.values );
clReleaseMemObject ( A.col_indices );
clReleaseMemObject ( A.row_pointer );
clReleaseMemObject ( x.values );
clReleaseMemObject ( y.values );
clReleaseMemObject ( alpha.value );
clReleaseMemObject ( beta.value );
std::cout << "Program completed successfully." << std::endl;
std::cout << " OpenCl Execution time= "
<<std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() * 1e-3
<< " seconds" << std::endl;
return 0;
}
when I run the code, when ate the point where A.values in created, I got segmentation fault. I can not figure it out why. when I used gdb to debug the code, I got the following error
Program received signal SIGSEGV, Segmentation fault.
_int_malloc (av=0x7ffff73ab760 <main_arena>, bytes=32) at malloc.c:3702
3702 malloc.c: No such file or directory.
I tried to use sparse matrix vector multiplication with the library and I modifeid the sample_spmv.cc as follows:
when I run the code, when ate the point where
A.values
in created, I gotsegmentation fault
. I can not figure it out why. when I usedgdb
to debug the code, I got the following error