Closed bagrorg closed 3 years ago
#include <CL/sycl.hpp> int main() { sycl::queue q{sycl::gpu_selector{}}; std::cout << "Selected device: " << q.get_device().get_info<sycl::info::device::name>() << "\n"; for (auto e : q.get_device().get_info<sycl::info::device::sub_group_sizes>()) { std::cout << e << ' '; } std::cout << "\n-------------\n\n"; sycl::nd_range<1> r{8 * 2, 8}; sycl::device_ptr<uint32_t> tmp_lock = sycl::malloc_device<uint32_t>(1, q); q.single_task([=]() { *tmp_lock = 0; }); sycl::device_ptr<uint32_t> _lock = tmp_lock; q.submit([&](sycl::handler &h) { h.parallel_for<class slab_hash_build>(r, [= ](sycl::nd_item<1> it)[[intel::reqd_sub_group_size(8)]] { int i = 0; if (it.get_local_id() == 0) { while (sycl::atomic<uint32_t, sycl::access::address_space::global_device_space>( _lock) .fetch_or(1)) { if (i++ > 1000) break; } sycl::atomic<uint32_t, sycl::access::address_space::global_device_space>(_lock) .fetch_and(0); } }); }) .wait(); }