clMathLibraries / clSPARSE

a software library containing Sparse functions written in OpenCL
Apache License 2.0
173 stars 60 forks source link

Segmentation fault when create cl_mem in sample_spmv #215

Open mehdiBabamehdi opened 4 years ago

mehdiBabamehdi commented 4 years ago

I tried to use sparse matrix vector multiplication with the library and I modifeid the sample_spmv.cc as follows:


#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_MINIMUM_OPENCL_VERSION BUILD_CLVERSION
#define CL_HPP_TARGET_OPENCL_VERSION BUILD_CLVERSION
#include <CL/cl.hpp>

#include "clSPARSE.h"
#include "clSPARSE-error.h"
#include <vector>
#include <string>
#include <iostream> 
#include <chrono>
#include <typeinfo>

 /**
 * @name   csrMtx
 * @info   Structure stores the data results in converting sparse matrix to CSR format
 *         
 * @author Mehdi
 */
struct csrMtx
{
    float* value = NULL;
    int* col     = NULL;
    int* row_ptr = NULL;
    int nnz      = 0;
};

/**
 * @name   denseMtxGen
 * @info   Create a matrix results in FDM descritization of poison equation
 *         
 * @author Mehdi
 */
void denseMtxGen(float* A, const int row, const int col)
{
   int n = row * col;
   for (int i = 0; i < n; ++i)
   {
                        A[i * n + i    ]   =  4;
      if (i + 1   <  n) A[i * n + i + 1]   = -1;
      if (i - 1   >= 0) A[i * n + i - 1]   = -1;
      if (i + col <  n) A[i * n + i + col] = -1;
      if (i - col >= 0) A[i * n + i - col] = -1;
   }
}

/**
 * @name   denseVecGen
 * @info   Create a vector filled with arbitary value
 *         
 * @author Mehdi
 */
void denseVecGen(float* x, const int row)
{
  for (int i = 0; i < row; ++i)
   {  
      x[i] = 4;
   }
}

/**
 * @name   csrMtx
 * @info   Convert sparse matrix to CSR format sparse matrix
 *           The data stores in csrMtx structure
 * @author Mehdi
 */
struct csrMtx denseToCsr(float* A_dense, const int row, const int col)
{
    csrMtx myCsrMtx;
    int nzero_row    = 5;
    int A_col        = row * col;
    myCsrMtx.value   = new float[A_col * nzero_row];
    myCsrMtx.col     = new int[A_col];
    myCsrMtx.row_ptr = new int[A_col];

    for(int i = 0; i < A_col; ++i)
    {
       for(int j = 0; j < A_col; ++j)
        { 
            if (A_dense[i * A_col + j] != 0)
            {
                //std::cout <<"A[" << i << " * " << j << "] = " <<A_dense[i * A_col + j] << std::endl; 
                myCsrMtx.value[myCsrMtx.nnz]   = A_dense[i * A_col + j];
                myCsrMtx.col[myCsrMtx.nnz]     = j;
                myCsrMtx.row_ptr[myCsrMtx.nnz] = i;
                myCsrMtx.nnz++;
            }
        }
    }
    return myCsrMtx;
}

int main (int argc, char* argv[])
{
    /**  Step 1. Setup OpenCL environment; **/

    // Init OpenCL environment;
    cl_int cl_status;

    // Get OpenCL platforms
    std::vector<cl::Platform> platforms;

    cl_status = cl::Platform::get(&platforms);

    if (cl_status != CL_SUCCESS)
    {
        std::cout << "Problem with getting OpenCL platforms"
                  << " [" << cl_status << "]" << std::endl;
        return -2;
    }

    int platform_id = 0;
    for (const auto& p : platforms)
    {
        std::cout << "Platform ID " << platform_id++ << " : "
                  << p.getInfo<CL_PLATFORM_NAME>() << std::endl;

    }

    // Using first platform
    platform_id = 0;
    cl::Platform platform = platforms[platform_id];

    // Get device from platform
    std::vector<cl::Device> devices;
    cl_status = platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);

    if (cl_status != CL_SUCCESS)
    {
        std::cout << "Problem with getting devices from platform"
                  << " [" << platform_id << "] " << platform.getInfo<CL_PLATFORM_NAME>()
                  << " error: [" << cl_status << "]" << std::endl;
    }

    std::cout << std::endl
              << "Getting devices from platform " << platform_id << std::endl;
    cl_int device_id = 0;
    for (const auto& device : devices)
    {
        std::cout << "Device ID " << device_id++ << " : "
                  << device.getInfo<CL_DEVICE_NAME>() << std::endl;

    }

    // Using first device;
    device_id = 0;
    cl::Device device = devices[device_id];

    // Create OpenCL context;
    cl::Context context(device);

    // Create OpenCL queue;
    cl::CommandQueue queue(context, device);

    /** Step 2. Setup GPU buffers **/

    //we will allocate it after matrix will be loaded;
    clsparseScalar alpha;
    clsparseInitScalar(&alpha);

    alpha.value = clCreateBuffer(context(), CL_MEM_READ_ONLY, sizeof(float),
                                 nullptr, &cl_status);

    clsparseScalar beta;
    clsparseInitScalar(&beta);

    beta.value = clCreateBuffer(context(), CL_MEM_READ_ONLY, sizeof(float),
                                nullptr, &cl_status);

    cldenseVector x;
    clsparseInitVector(&x);

    cldenseVector y;
    clsparseInitVector(&y);

    clsparseCsrMatrix A;
    clsparseInitCsrMatrix(&A);

    /** Step 3. Init clSPARSE library **/

    clsparseStatus status = clsparseSetup();
    if (status != clsparseSuccess)
    {
        std::cout << "Problem with executing clsparseSetup()" << std::endl;
        return -3;
    }

    // Create clsparseControl object
    clsparseCreateResult createResult = clsparseCreateControl( queue( ) );
    CLSPARSE_V( createResult.status, "Failed to create clsparse control" );

    int denseRow = 5;
    int denseCol = 5;

    float* A_dense = new float[denseRow * denseCol * denseRow * denseCol];

    csrMtx myCsrMtx;    
    denseMtxGen(A_dense, denseRow, denseCol);
    myCsrMtx = denseToCsr(A_dense, denseRow, denseCol);

    // Read matrix from file. Calculates the rowBlocks structures as well.
    clsparseIdx_t nnz, row, col;

    A.num_nonzeros = myCsrMtx.nnz;
    A.num_rows = row * col;
    A.num_cols = row * col;

    std::cout << typeid(A.values).name() << std::endl;
    // Allocate memory for CSR matrix
    A.values = ::clCreateBuffer( context(), CL_MEM_READ_ONLY,
                                 A.num_nonzeros * sizeof( float ), NULL, &cl_status );

    std::cout << cl_status << std::endl;
    A.col_indices = ::clCreateBuffer( context(), CL_MEM_READ_ONLY,
                                     A.num_nonzeros * sizeof( clsparseIdx_t ), NULL, &cl_status );

    std::cout << "7" << std::endl;
    A.row_pointer = ::clCreateBuffer( context(), CL_MEM_READ_ONLY,
                                     ( A.num_rows + 1 ) * sizeof( clsparseIdx_t ), NULL, &cl_status );

    std::cout << "7" << std::endl;
    float one  = 1.0f;
    float zero = 0.0f;

    // alpha = 1;
    float* halpha = (float*) clEnqueueMapBuffer(queue(), alpha.value, CL_TRUE, CL_MAP_WRITE,
                                                0, sizeof(float), 0, nullptr, nullptr, &cl_status);
    *halpha = one;

    cl_status = clEnqueueUnmapMemObject(queue(), alpha.value, halpha,
                                        0, nullptr, nullptr);

    //beta = 0;
    float* hbeta = (float*) clEnqueueMapBuffer(queue(), beta.value, CL_TRUE, CL_MAP_WRITE,
                                               0, sizeof(float), 0, nullptr, nullptr, &cl_status);
    *hbeta = zero;

    cl_status = clEnqueueUnmapMemObject(queue(), beta.value, hbeta,
                                        0, nullptr, nullptr);

    x.num_values = A.num_cols;
    x.values = clCreateBuffer(context(), CL_MEM_READ_ONLY, x.num_values * sizeof(float),
                              NULL, &cl_status);

    cl_status = clEnqueueFillBuffer(queue(), x.values, &one, sizeof(float),
                                    0, x.num_values * sizeof(float), 0, nullptr, nullptr);

    y.num_values = A.num_rows;
    y.values = clCreateBuffer(context(), CL_MEM_READ_WRITE, y.num_values * sizeof(float),
                              NULL, &cl_status);

    cl_status = clEnqueueFillBuffer(queue(), y.values, &zero, sizeof(float),
                                    0, y.num_values * sizeof(float), 0, nullptr, nullptr);

    auto t1 = std::chrono::high_resolution_clock::now();
    /**Step 4. Call the spmv algorithm */
    status = clsparseScsrmv(&alpha, &A, &x, &beta, &y, createResult.control );

    if (status != clsparseSuccess)
    {
        std::cout << "Problem with execution SpMV algorithm."
                  << " Error: " << status << std::endl;
    }
    auto t2 = std::chrono::high_resolution_clock::now();

    /** Step 5. Close & release resources */
    status = clsparseReleaseControl( createResult.control );
    if (status != clsparseSuccess)
    {
        std::cout << "Problem with releasing control object."
                  << " Error: " << status << std::endl;
    }

    status = clsparseTeardown();

    if (status != clsparseSuccess)
    {
        std::cout << "Problem with closing clSPARSE library."
                  << " Error: " << status << std::endl;
    }

    //release mem;
    clsparseCsrMetaDelete( &A );
    clReleaseMemObject ( A.values );
    clReleaseMemObject ( A.col_indices );
    clReleaseMemObject ( A.row_pointer );

    clReleaseMemObject ( x.values );
    clReleaseMemObject ( y.values );

    clReleaseMemObject ( alpha.value );
    clReleaseMemObject ( beta.value );

    std::cout << "Program completed successfully." << std::endl;
    std::cout << " OpenCl Execution time= "
              <<std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() * 1e-3 
              << " seconds" << std::endl;
    return 0;
}

when I run the code, when ate the point where A.values in created, I got segmentation fault. I can not figure it out why. when I used gdb to debug the code, I got the following error


Program received signal SIGSEGV, Segmentation fault.
_int_malloc (av=0x7ffff73ab760 <main_arena>, bytes=32) at malloc.c:3702
3702    malloc.c: No such file or directory.