using dpc++ in cmake projects

hiaselhans commented 4 years ago

I tried to use the dpc++ in a cmake environment with llvm in a submodule and configured set(CMAKE_CXX_COMPILER "${LLVM_PATH}/bin/clang++") and a few more.

now, cmake seems to split the linking and building process and that opens for some troubles. the program builds fine, but fails to execute due to the device code missing..?

terminate called after throwing an instance of 'cl::sycl::runtime_error'
  what():  No kernel named _ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE10FillBuffer was found -46 (CL_INVALID_KERNEL_NAME)

CMake seems to invoke:

clang++ -fsycl -o sycl.o -c sycl.cpp
clang++ -fsycl -rdynamic sycl.cpp.o -o sycl -lsycl

manual compilation works fine using clang++ -fsycl -o sycl sycl.cpp. seems like the -c -> Only run preprocess, compile, and assemble steps and the separate linking target is missing device code / fat object creation.

I can read into CMake more and find a way if you point me to some starter. computecpp's FindComputeCPP is providing a add_sycl_to_target function for this. I couldn't get my head around how this is working in the tests yet but it's probably a good place to start?

schornakj commented 4 years ago

@hiaselhans I think I've run into this issue as well. Were you able to find a solution?

schornakj commented 4 years ago

I was able to get my really simple test project working after some experimentation. In my case, the problem was that I was already passing -fsycl and -fsycl-targets=nvptx64-nvidia-cuda-sycldevice in target_compile_options but I needed to add them to target_link_options as well.

I've included my CMakeLists.txt and source file below to provide a minimal working example. It's possible that @hiaselhans's problem was actually different than mine, but I hope this is helpful for other people who find this issue. I did this work on Ubuntu 18.04 using LLVM tag: 20200801.

The directory structure is:

my_project
  CMakeLists.txt
  src
    simple_sycl_app.cpp

CMakeLists.txt

cmake_minimum_required(VERSION 3.10.0)

# NOTE: it's vastly preferable to set these variables at a higher scope than directly within CMakeLists.txt
set(CMAKE_C_COMPILER /home/joe/library_source/llvm/build/bin/clang)
set(CMAKE_CXX_COMPILER /home/joe/library_source/llvm/build/bin/clang++)

project(my_project VERSION 0.0.0 LANGUAGES CXX)

find_package(LLVM REQUIRED CONFIG)

add_executable(simple_sycl_app src/simple_sycl_app.cpp)
target_compile_options(simple_sycl_app PUBLIC -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice)
target_link_libraries(simple_sycl_app sycl)
target_link_options(simple_sycl_app PUBLIC -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice)

install(TARGETS
    simple_sycl_app
    RUNTIME DESTINATION bin)

simple_sycl_app.cpp (identical to the example provided in the readme)

#include <CL/sycl.hpp>

class FillBuffer;

int main() {
  // Creating buffer of 4 ints to be used inside the kernel code
  cl::sycl::buffer<cl::sycl::cl_int, 1> Buffer(4);

  // Creating SYCL queue
  cl::sycl::queue Queue;

  // Size of index space for kernel
  cl::sycl::range<1> NumOfWorkItems{Buffer.get_count()};

  // Submitting command group(work) to queue
  Queue.submit([&](cl::sycl::handler &cgh) {
    // Getting write only access to the buffer on a device
    auto Accessor = Buffer.get_access<cl::sycl::access::mode::write>(cgh);
    // Executing kernel
    cgh.parallel_for<FillBuffer>(
        NumOfWorkItems, [=](cl::sycl::id<1> WIid) {
          // Fill buffer with indexes
          Accessor[WIid] = (cl::sycl::cl_int)WIid.get(0);
        });
  });mrtd/other_workspaces/yak_eloquent_ws/src/yaksycl

  // Getting read only access to the buffer on the host.
  // Implicit barrier waiting for queue to complete the work.
  const auto HostAccessor = Buffer.get_access<cl::sycl::access::mode::read>();

  // Check the results
  bool MismatchFound = false;
  for (size_t I = 0; I < Buffer.get_count(); ++I) {
    if (HostAccessor[I] != I) {
      std::cout << "The result is incorrect for element: " << I
                << " , expected: " << I << " , got: " << HostAccessor[I]
                << std::endl;
      MismatchFound = true;
    }
  }

  if (!MismatchFound) {
    std::cout << "The results are correct!" << std::endl;
  }

  return MismatchFound;
}

From the my_project directory I ran: mkdir build && cd build && cmake .. && make. Here is a subset of the verbose make output showing the Clang commands that CMake generates:

make[2]: Entering directory '/home/joe/my_project/build'
[ 25%] Building CXX object CMakeFiles/simple_sycl_app.dir/src/simple_sycl_app.cpp.o
/home/joe/library_source/llvm/build/bin/clang++    -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice -o CMakeFiles/simple_sycl_app.dir/src/simple_sycl_app.cpp.o -c /homejoe/my_project/src/simple_sycl_app.cpp
[ 50%] Linking CXX executable simple_sycl_app
/home/joe/.local/lib/python3.6/site-packages/cmake/data/bin/cmake -E cmake_link_script CMakeFiles/simple_sycl_app.dir/link.txt --verbose=1
/home/joe/library_source/llvm/build/bin/clang++    -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice CMakeFiles/simple_sycl_app.dir/src/simple_sycl_app.cpp.o  -o simple_sycl_app  -lsycl 
make[2]: Leaving directory '/home/joe/my_project/build'
[ 50%] Built target simple_sycl_app

Results from running the executable:

$ ./simple_sycl_app 
The results are correct!

intel / llvm

using dpc++ in cmake projects #1427