Closed pjaaskel closed 11 months ago
Hangs with all my local test envs (including PoCL-CPU). Tested with an LLVM 17 build.
Backtrace of a PoCL-CPU test case run:
Thread 11 (Thread 0x7f94e1ffb640 (LWP 171980) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f8780) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f8780) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 10 (Thread 0x7f94e27fc640 (LWP 171979) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f8740) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f8740) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 9 (Thread 0x7f94e2ffd640 (LWP 171978) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 --Type <RET> for more, q to quit, c to continue without paging-- #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f8700) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f8700) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 8 (Thread 0x7f94e37fe640 (LWP 171977) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f86c0) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f86c0) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 7 (Thread 0x7f94e3fff640 (LWP 171976) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f8680) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.--Type <RET> for more, q to quit, c to continue without paging-- c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f8680) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 6 (Thread 0x7f94e8dfb640 (LWP 171975) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f8640) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f8640) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 5 (Thread 0x7f94e95fc640 (LWP 171974) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f8600) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f8600) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 --Type <RET> for more, q to quit, c to continue without paging-- Thread 4 (Thread 0x7f94e9dfd640 (LWP 171973) "__cosf"): #0 __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:57 #1 __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x7f94f61157e8 <scheduler+168>) at ./nptl/futex-internal.c:87 #2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f94f61157e8 <scheduler+168>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139 #3 0x00007f94f6293a41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x7f94f6115800 <scheduler+192>, cond=0x7f94f61157c0 <scheduler+128>) at ./nptl/pthread_cond_wait.c:503 #4 ___pthread_cond_wait (cond=0x7f94f61157c0 <scheduler+128>, mutex=0x7f94f6115800 <scheduler+192>) at ./nptl/pthread_cond_wait.c:627 #5 0x00007f94f5edd3e8 in pthread_scheduler_get_work (td=0x5645ec0f85c0) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:540 #6 0x00007f94f5edd6c1 in pocl_pthread_driver_thread (p=0x5645ec0f85c0) at /home/pjaaskel/src/pocl/lib/CL/devices/pthread/pthread_scheduler.c:589 #7 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #8 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 3 (Thread 0x7f94ea5fe640 (LWP 171969) "__cosf-ust"): #0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 #1 0x00007f94f653b136 in ?? () from /lib/x86_64-linux-gnu/liblttng-ust.so.1 #2 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #3 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 2 (Thread 0x7f94eadff640 (LWP 171968) "__cosf-ust"): #0 __recvmsg_syscall (flags=0, msg=0x7f94eadfe530, fd=6) at ../sysdeps/unix/sysv/linux/recvmsg.c:27 #1 __libc_recvmsg (fd=6, msg=0x7f94eadfe530, flags=0) at ../sysdeps/unix/sysv/linux/recvmsg.c:41 #2 0x00007f94f6565eec in ?? () from /lib/x86_64-linux-gnu/liblttng-ust.so.1 #3 0x00007f94f653af42 in ?? () from /lib/x86_64-linux-gnu/liblttng-ust.so.1 #4 0x00007f94f6294ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #5 0x00007f94f6326a40 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 Thread 1 (Thread 0x7f94f6856800 (LWP 171966) "__cosf"): #0 futex_wait (private=0, expected=2, futex_word=0x5645ec184f78) at ../sysdeps/nptl/futex-internal.h:146 #1 __GI___lll_lock_wait (futex=futex@entry=0x5645ec184f78, private=0) at ./nptl/lowlevellock.c:49 #2 0x00007f94f6298002 in lll_mutex_lock_optimized (mutex=0x5645ec184f78) at ./nptl/pthread_mutex_lock.c:48 #3 ___pthread_mutex_lock (mutex=0x5645ec184f78) at ./nptl/pthread_mutex_lock.c:93 #4 0x00007f94f6a732d3 in __gthread_mutex_lock (__mutex=0x5645ec184f78) at /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/gthr-default.h:749 #5 std::mutex::lock (this=0x5645ec184f78) at /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/std_mutex.h:100 #6 std::lock_guard<std::mutex>::lock_guard (__m=..., this=<optimized out>) at /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bi--Type <RET> for more, q to quit, c to continue without paging-- ts/std_mutex.h:229 #7 chipstar::Queue::getSyncQueuesLastEvents (this=0x5645ec1852f0) at /home/pjaaskel/src/chipStar/src/CHIPBackend.cc:1538 #8 0x00007f94f6ae7c8b in CHIPQueueOpenCL::getSyncQueuesEventHandles (this=0x80, this@entry=0x5645ec1852f0) at /home/pjaaskel/src/chipStar/src/backend/OpenCL/CHIPBackendOpenCL.cc:952 #9 0x00007f94f6aeddd5 in CHIPQueueOpenCL::enqueueBarrierImpl (this=0x5645ec1852f0, EventsToWaitFor=std::vector of length 0, capacity 0) at /home/pjaaskel/src/chipStar/src/backend/OpenCL/CHIPBackendOpenCL.cc:1364 #10 0x00007f94f6a6f69c in chipstar::Context::syncQueues (this=<optimized out>, TargetQueue=0x5645ec1852f0) at /home/pjaaskel/src/chipStar/src/CHIPBackend.cc:1089 #11 0x00007f94f6a73987 in chipstar::Queue::memCopy (this=0x5645ec1852f0, Dst=0x5645ec19f280, Src=0x2, Size=140277761774272) at /home/pjaaskel/src/chipStar/src/CHIPBackend.cc:1586 #12 0x00007f94f6aaf276 in hipMallocInternal (Ptr=0x7ffea8c8b218, Size=<optimized out>) at /home/pjaaskel/src/chipStar/src/CHIPBindings.cc:2418 #13 hipMalloc (Ptr=0x7ffea8c8b218, Size=<optimized out>) at /home/pjaaskel/src/chipStar/src/CHIPBindings.cc:2429 #14 0x00005645ea8173b3 in ____C_A_T_C_H____T_E_S_T____17 () at /home/pjaaskel/src/chipStar/HIP/tests/catch/unit/deviceLib/SinglePrecisionIntrinsics/__cosf.cc:2 #15 0x00005645ea82d1e9 in Catch::TestCase::invoke (this=<optimized out>) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:14100 #16 Catch::RunContext::invokeActiveTestCase (this=0x7ffea8c8b760) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:12959 #17 Catch::RunContext::runCurrentTest (this=this@entry=0x7ffea8c8b760, redirectedCout="", redirectedCerr="") at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:12932 #18 0x00005645ea82c8f0 in Catch::RunContext::runTest (this=this@entry=0x7ffea8c8b760, testCase=...) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:12693 #19 0x00005645ea83187a in Catch::(anonymous namespace)::TestGroup::execute (this=0x7ffea8c8b750) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:13287 #20 Catch::Session::runInternal (this=this@entry=0x7ffea8c8b9f0) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:13493 #21 0x00005645ea830b04 in Catch::Session::run (this=this@entry=0x7ffea8c8b9f0) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:13449 #22 0x00005645ea845f52 in Catch::Session::run<char> (this=0x7ffea8c8b9f0, argc=<optimized out>, argv=<optimized out>) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:13171 #23 main (argc=2, argv=0x7ffea8c8bca8) at /home/pjaaskel/src/chipStar/HIP/tests/catch/external/Catch2/catch.hpp:17448
This disappeared with a wiped build (recmake with new defaults).
Hangs with all my local test envs (including PoCL-CPU). Tested with an LLVM 17 build.
Backtrace of a PoCL-CPU test case run: