eyalroz / cuda-api-wrappers

Thin, unified, C++-flavored wrappers for the CUDA APIs
BSD 3-Clause "New" or "Revised" License
790 stars 80 forks source link

Definitions of cuda::memory::managed::allocate(device, num_bytes) #514

Closed codecircuit closed 1 year ago

codecircuit commented 1 year ago

I see them in cuda/api/multi_wrapper_impls/memory.hpp but I get undefined references. Steps to reproduce:

#include <algorithm>
#include <cuda/api.hpp>
#include <iostream>
#include <memory>

__global__ void vectorAdd(const float *A, const float *B, float *C,
              int numElements) {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i < numElements) {
    C[i] = A[i] + B[i];
    }
}

int main() {
    if (cuda::device::count() == 0) {
    std::cerr << "No CUDA devices on this system"
          << "\n";
    exit(EXIT_FAILURE);
    }

    int numElements = 50000;
    size_t size = numElements * sizeof(float);
    std::cout << "[Vector addition of " << numElements << " elements]\n";

    auto device = cuda::device::current::get();
    auto buffer_A = cuda::memory::managed::allocate(device, size);
    auto buffer_B = cuda::memory::managed::allocate(device, size);
    auto buffer_C = cuda::memory::managed::allocate(device, size);

    cuda::memory::managed::free(buffer_A);
    cuda::memory::managed::free(buffer_B);
    cuda::memory::managed::free(buffer_C);

    std::cout << "Test PASSED\n";
    std::cout << "SUCCESS\n";
    return 0;
}

compile the samples with CMake build and get the error:

[  2%] Built target vectorAdd
[  3%] Building CUDA object examples/CMakeFiles/vectorAddMapped.dir/modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu.o
[  4%] Linking CUDA executable bin/vectorAddMapped
/usr/bin/ld: CMakeFiles/vectorAddMapped.dir/modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu.o: in function `main':
tmpxft_00008304_00000000-6_vectorAddMapped.cudafe1.cpp:(.text.startup+0x160): undefined reference to `cuda::memory::managed::allocate(cuda::device_t, unsigned long, cuda::memory::managed::initial_visibility_t)'
/usr/bin/ld: tmpxft_00008304_00000000-6_vectorAddMapped.cudafe1.cpp:(.text.startup+0x18f): undefined reference to `cuda::memory::managed::allocate(cuda::device_t, unsigned long, cuda::memory::managed::initial_visibility_t)'
/usr/bin/ld: tmpxft_00008304_00000000-6_vectorAddMapped.cudafe1.cpp:(.text.startup+0x1be): undefined reference to `cuda::memory::managed::allocate(cuda::device_t, unsigned long, cuda::memory::managed::initial_visibility_t)'
collect2: error: ld returned 1 exit status
make[2]: *** [examples/CMakeFiles/vectorAddMapped.dir/build.make:102: examples/bin/vectorAddMapped] Error 1
make[1]: *** [CMakeFiles/Makefile2:200: examples/CMakeFiles/vectorAddMapped.dir/all] Error 2
make: *** [Makefile:136: all] Error 2
eyalroz commented 1 year ago

I guess I'll need to release a 0.6.3.1 soon...