rayon-rs / rayon

Rayon: A data parallelism library for Rust
Apache License 2.0
10.8k stars 492 forks source link

ThreadPool install hangs and eats 100% CPU on ARM #820

Closed jpalus closed 3 years ago

jpalus commented 3 years ago

That's related to rust-lang/rust#53670.

When running under certain specific conditions following code will hang eating 2 cores:

fn main() {
    let pool = rayon::ThreadPoolBuilder::new()
        .num_threads(2)
        .build()
        .unwrap();
    pool.install(move || {
    println!("Hello, world!");
    }); 
}

Conditions:

Regarding optimization level it appears that it's sufficient to optimize only crossbeam-epoch to trigger issue.

gdb backtrace from one of active threads:

#0  0x005fb4f4 in core::sync::atomic::atomic_compare_exchange_weak (dst=0xf7100f40, old=0, new=4145024920, success=<optimized out>, failure=core::sync::atomic::Ordering::Relaxed)
    at library/core/src/sync/atomic.rs:2340
#1  0x005fb298 in core::sync::atomic::AtomicUsize::compare_exchange_weak (self=<optimized out>, current=0, new=4145024920, success=core::sync::atomic::Ordering::Relaxed, failure=<optimized out>)
    at library/core/src/sync/atomic.rs:1570
#2  0x005fbc14 in crossbeam_epoch::atomic::Atomic<T>::compare_and_set_weak (self=0xf7100f40, current=..., new=..., ord=<optimized out>)
    at build/reproducer/vendor/crossbeam-epoch/src/atomic.rs:526
#3  0x005fc584 in crossbeam_epoch::sync::list::List<T,C>::insert (self=0xf7100f40, container=..., guard=0x643530 <crossbeam_epoch::guard::unprotected::UNPROTECTED>)
    at build/reproducer/vendor/crossbeam-epoch/src/sync/list.rs:186
#4  0x005fa774 in crossbeam_epoch::internal::Local::register (collector=0x664040 <_ZN79_$LT$crossbeam_epoch..default..COLLECTOR$u20$as$u20$core..ops..deref..Deref$GT$5deref11__stability4LAZY17h166c69d70f36dbabE.llvm.1090946855762194550>)
    at build/reproducer/vendor/crossbeam-epoch/src/internal.rs:402
#5  0x005fc2f0 in crossbeam_epoch::collector::Collector::register (self=0xf7100f40) at build/reproducer/vendor/crossbeam-epoch/src/collector.rs:45
#6  0x005fde34 in crossbeam_epoch::default::HANDLE::__init () at build/reproducer/vendor/crossbeam-epoch/src/default.rs:18
#7  0x005fdce0 in core::ops::function::FnOnce::call_once () at library/core/src/ops/function.rs:227
#8  0x005fdc4c in std::thread::local::lazy::LazyKeyInner<T>::initialize (self=0xf76af8b8, init=<optimized out>) at library/std/src/thread/local.rs:304
#9  0x005fd71c in std::thread::local::fast::Key<T>::try_initialize (self=<optimized out>, init=<optimized out>) at library/std/src/thread/local.rs:473
#10 0x005fd798 in std::thread::local::fast::Key<T>::get (self=0xf76af8b8, init=<optimized out>) at library/std/src/thread/local.rs:456
#11 0x005fc3c0 in crossbeam_epoch::default::HANDLE::__getit () at library/std/src/thread/local.rs:183
#12 0x005b7290 in std::thread::local::LocalKey<T>::try_with (self=0x661964, f=...) at library/std/src/thread/local.rs:271
#13 0x005bf404 in crossbeam_epoch::default::with_handle (f=...) at build/reproducer/vendor/crossbeam-epoch/src/default.rs:43
#14 0x005bf6cc in crossbeam_epoch::default::is_pinned () at build/reproducer/vendor/crossbeam-epoch/src/default.rs:30
#15 0x005b2324 in crossbeam_deque::deque::Stealer<T>::steal (self=0x242fcf0) at build/reproducer/vendor/crossbeam-deque/src/deque.rs:619
#16 0x005acff8 in rayon_core::registry::WorkerThread::steal::{{closure}} (victim_index=1) at build/reproducer/vendor/rayon-core/src/registry.rs:779
#17 0x005b08b8 in core::iter::traits::iterator::Iterator::find_map::check::{{closure}} (x=1) at library/core/src/iter/traits/iterator.rs:2257
#18 0x005d3e68 in core::iter::adapters::filter_try_fold::{{closure}} (acc=(), item=1) at library/core/src/iter/adapters/mod.rs:1078
#19 0x005d2ee0 in core::ops::function::impls::<impl core::ops::function::FnMut<A> for &mut F>::call_mut (self=0xf76ae290, args=...) at library/core/src/ops/function.rs:269
#20 0x005b0dec in core::iter::traits::iterator::Iterator::try_fold (self=0xf76ae474, init=(), f=0xf76ae3b8) at library/core/src/iter/traits/iterator.rs:1888
#21 0x005bf0c8 in <core::iter::adapters::chain::Chain<A,B> as core::iter::traits::iterator::Iterator>::try_fold (self=0xf76ae470, acc=(), f=...) at library/core/src/iter/adapters/chain.rs:105
#22 0x005d136c in <core::iter::adapters::Filter<I,P> as core::iter::traits::iterator::Iterator>::try_fold (self=0xf76ae470, init=(), fold=...) at library/core/src/iter/adapters/mod.rs:1127
#23 0x005d38f4 in core::iter::traits::iterator::Iterator::find_map (self=0xf76ae470, f=...) at library/core/src/iter/traits/iterator.rs:2263
#24 0x005ace8c in rayon_core::registry::WorkerThread::steal (self=0xf76ae680) at build/reproducer/vendor/rayon-core/src/registry.rs:774
#25 0x005acc30 in rayon_core::registry::WorkerThread::wait_until_cold::{{closure}} () at build/reproducer/vendor/rayon-core/src/registry.rs:726
#26 0x005a162c in core::option::Option<T>::or_else (self=..., f=...) at library/core/src/option.rs:752
#27 0x005ac9d0 in rayon_core::registry::WorkerThread::wait_until_cold (self=0xf76ae680, latch=0x242fcb8) at build/reproducer/vendor/rayon-core/src/registry.rs:724
#28 0x005ac8f4 in rayon_core::registry::WorkerThread::wait_until (self=0xf76ae680, latch=0x242fcb8) at build/reproducer/vendor/rayon-core/src/registry.rs:704
#29 0x005ad400 in rayon_core::registry::main_loop (worker=..., registry=..., index=0) at build/reproducer/vendor/rayon-core/src/registry.rs:837
#30 0x005aac80 in rayon_core::registry::ThreadBuilder::run (self=...) at build/reproducer/vendor/rayon-core/src/registry.rs:56
#31 0x005ab094 in <rayon_core::registry::DefaultSpawn as rayon_core::registry::ThreadSpawn>::spawn::{{closure}} () at build/reproducer/vendor/rayon-core/src/registry.rs:101
#32 0x005b5b24 in std::sys_common::backtrace::__rust_begin_short_backtrace (f=...) at library/std/src/sys_common/backtrace.rs:137
#33 0x005c1d38 in std::thread::Builder::spawn_unchecked::{{closure}}::{{closure}} () at library/std/src/thread/mod.rs:464
#34 0x005d1450 in <std::panic::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once (self=..., _args=()) at library/std/src/panic.rs:308
#35 0x005ae9dc in std::panicking::try::do_call (data=0xf76aeac8 "\000") at library/std/src/panicking.rs:381
#36 0x005b0f94 in __rust_try ()
#37 0x005ae424 in std::panicking::try (f=...) at library/std/src/panicking.rs:345
#38 0x005d18e8 in std::panic::catch_unwind (f=...) at library/std/src/panic.rs:382
#39 0x005c193c in std::thread::Builder::spawn_unchecked::{{closure}} () at library/std/src/thread/mod.rs:463
#40 0x005c2744 in core::ops::function::FnOnce::call_once{{vtable-shim}} () at library/core/src/ops/function.rs:227
#41 0x00627834 in std::sys::unix::thread::Thread::new::thread_start ()

rayon version: 1.5.0

cuviper commented 3 years ago

Can you also report this to crossbeam? Rayon is only using safe interfaces to the deque here.

cuviper commented 3 years ago

As the crossbeam folks found rust-lang/rust#60605, I think this isn't something we can affect here.