High cpu usage - Githubissues

mhristache commented 3 years ago

Describe the bug I have a multi-threaded TCP server that receives binary data, parse it and creates some Prometheus stats.

One of the threads is clogging the cpu (stuck at 100% CPU usage). I did a perf recording and it looks like most of the CPU is consumed by Prometheus lib (see bellow).

The strange part is that there are no segments sent by the clients but the CPU usage is still 100%.

Also I don't get anything if I strace the thread. So it looks stuck.

Any clue what might be wrong here?

    41.55%    41.10%           783  ebmd     ebmd               [.] prometheus::vec::MetricVec<T>::with_label_values
            |
            |--19.16%--0x1
            |          prometheus::vec::MetricVec<T>::with_label_values
            |
            |--16.93%--0
            |          |
            |          |--15.83%--prometheus::vec::MetricVec<T>::with_label_values
            |          |
            |           --0.70%--0
            |                     prometheus::vec::MetricVec<T>::with_label_values
            |
            |--4.71%--prometheus::vec::MetricVec<T>::with_label_values
            |
             --0.74%--ebmd::server::handle_ebm_stream_connection

    35.46%     0.00%             0  ebmd     [unknown]          [.] 0000000000000000
            |
            ---0
               |
               |--16.28%--prometheus::vec::MetricVec<T>::with_label_values
               |
               |--12.35%--hashbrown::map::make_hash
               |
               |--6.13%--<std::collections::hash::map::DefaultHasher as core::hash::Hasher>::write
               |
                --0.70%--0
                          prometheus::vec::MetricVec<T>::with_label_values

    20.24%    19.20%           344  ebmd     ebmd               [.] ebmd::server::handle_ebm_stream_connection
            |
             --19.80%--ebmd::server::handle_ebm_stream_connection

    19.16%     0.00%             0  ebmd     [unknown]          [.] 0x0000000000000001
            |
            ---0x1
               prometheus::vec::MetricVec<T>::with_label_values

    13.54%    12.75%           233  ebmd     ebmd               [.] hashbrown::map::make_hash
            |
            |--12.75%--0
            |          |
            |          |--11.56%--hashbrown::map::make_hash
            |          |
            |           --0.79%--<std::collections::hash::map::DefaultHasher as core::hash::Hasher>::write
            |
             --0.79%--hashbrown::map::make_hash

    10.79%    10.35%           194  ebmd     ebmd               [.] prometheus::atomic64::AtomicU64::inc_by_with_ordering
            |
             --10.64%--prometheus::atomic64::AtomicU64::inc_by_with_ordering

    10.67%    10.67%           216  ebmd     ebmd               [.] ebmd::parser::Parser::parse
            |
            ---ebmd::parser::Parser::parse

     6.47%     5.68%           115  ebmd     ebmd               [.] <std::collections::hash::map::DefaultHasher as core::hash::Hasher>::write
            |
            |--5.68%--0
            |          |
            |           --5.33%--<std::collections::hash::map::DefaultHasher as core::hash::Hasher>::write
            |
             --0.79%--<std::collections::hash::map::DefaultHasher as core::hash::Hasher>::write

Checking the low level instructions, the CPU seems to be consumed by instructions prefixed by LOCK:

Percent│125:   mov       %rdi,0x10(%rsp)
       │       mov       (%r12),%rax
       │       test      $0x8,%al
       │     ↓ jne       258
  1.34 │       mov       %rax,%rcx
       │       add       $0x10,%rcx
       │     ↓ jb        258
 22.85 │       lock      cmpxchg   %rcx,(%r12)
       │     ↓ jne       258
  2.16 │14f:   mov       %r12,(%rsp)
       │       lea       0x18(%r13),%rdi
       │       lea       0x10(%rsp),%rsi
       │       mov       %rdi,0x8(%rsp)
  1.34 │     → callq     hashbrown::map::make_hash
  0.78 │       mov       0x28(%r13),%rdx
  0.22 │       mov       0x30(%r13),%r11
  0.11 │       mov       %rdx,%rsi
  0.45 │       and       %rax,%rsi
  0.45 │       shr       $0x39,%rax
       │       lea       0x10(%rsi),%rdi
  0.34 │       and       %rdx,%rdi
  0.78 │       movdqu    (%r11,%rsi,1),%xmm1
  1.01 │       movd      %eax,%xmm0
       │       punpcklbw %xmm0,%xmm0
  0.11 │       pshuflw   $0xe0,%xmm0,%xmm0
  0.89 │       pshufd    $0x0,%xmm0,%xmm0
  0.56 │       movdqa    %xmm0,%xmm2
  0.68 │       pcmpeqb   %xmm1,%xmm2
       │       pmovmskb  %xmm2,%eax
  0.11 │       mov       $0x10,%r9d
  0.34 │       test      %ax,%ax
       │     ↓ je        216
  0.57 │1ae:   bsf       %ax,%bp
  0.11 │       movzwl    %bp,%ebx
  0.56 │       add       %rsi,%rbx
  0.22 │       mov       0x10(%rsp),%r10
  0.45 │       and       %rdx,%rbx
       │       mov       %rbx,%rbp
  0.45 │       shl       $0x4,%rbp
       │       mov       %r11,%rcx
  0.38 │       sub       %rbp,%rcx
  2.04 │       cmp       -0x10(%rcx),%r10
       │     ↓ jne       281
       │       neg       %rbx
       │1da:   shl       $0x4,%rbx
       │       mov       -0x8(%r11,%rbx,1),%rbp
 17.91 │       lock      addq      $0x1,0x0(%rbp)
       │     ↓ jle       5ee
       │       test      %rbp,%rbp
       │     ↓ je        2f9
  0.78 │       mov       $0xfffffffffffffff0,%rax
 23.41 │       lock      xadd      %rax,(%r12)
  0.11 │       and       $0xfffffffffffffff2,%rax

Maybe a deadlock or an infinit loop somewhere?

To Reproduce Not sure, first time I see it.

Expected behavior A clear and concise description of what you expected to happen.

System information

CPU architecture: X86_64
Distribution and kernel version: SLES 15.2 5.3.18-24.46-default
SELinux on?: No
Any other system details we should know?: apparmor enabled using default docker profile

Additional context I am using quite a large number of labels (6) and that can cause ~10k unique metrics.

breezewish commented 3 years ago

Currently I have no clues.. Is it easy for you to reproduce this problem after your server restarts?

mhristache commented 3 years ago

No, I've only seen it one time and no idea how to reproduce it. Anything that you want me to check if I reproduce it?

Thank you

mhristache commented 3 years ago

The issue popped up again on another system and I confirm the issue is caused by a deadlock somewhere in the Prometheus lib and I managed to get a backtrace:

#0  alloc::sync::{{impl}}::drop<prometheus::value::Value<prometheus::atomic64::AtomicU64>> (self=0x7fb515f77a38) at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/alloc/src/sync.rs:1419
#1  core::ptr::drop_in_place<alloc::sync::Arc<prometheus::value::Value<prometheus::atomic64::AtomicU64>>> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ptr/mod.rs:175
#2  core::ptr::drop_in_place<prometheus::counter::GenericCounter<prometheus::atomic64::AtomicU64>> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ptr/mod.rs:175
#3  ebmd::server::handle_ebm_stream_connection (stream=..., spec=..., tx=..., l=..., log_raw_events=false, iias=...) at src/server.rs:124
#4  0x0000560080c2e21c in ebmd::server::start_ebm_stream_instance::{{closure}} () at src/server.rs:73
#5  std::sys_common::backtrace::__rust_begin_short_backtrace<closure-1,()> (f=...) at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:137
#6  0x0000560080bf1abc in std::thread::{{impl}}::spawn_unchecked::{{closure}}::{{closure}}<closure-1,()> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/thread/mod.rs:464
#7  std::panic::{{impl}}::call_once<(),closure-0> (self=..., _args=<optimized out>) at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:308
#8  std::panicking::try::do_call<std::panic::AssertUnwindSafe<closure-0>,()> (data=<optimized out>) at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:381
#9  std::panicking::try<(),std::panic::AssertUnwindSafe<closure-0>> (f=...) at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:345
#10 std::panic::catch_unwind<std::panic::AssertUnwindSafe<closure-0>,()> (f=...) at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:382
#11 std::thread::{{impl}}::spawn_unchecked::{{closure}}<closure-1,()> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/thread/mod.rs:463
#12 core::ops::function::FnOnce::call_once<closure-0,()> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ops/function.rs:227
#13 0x0000560080e3275a in alloc::boxed::{{impl}}::call_once<(),FnOnce<()>> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/alloc/src/boxed.rs:1042
#14 alloc::boxed::{{impl}}::call_once<(),alloc::boxed::Box<FnOnce<()>>> () at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/alloc/src/boxed.rs:1042
#15 std::sys::unix::thread::{{impl}}::new::thread_start () at library/std/src/sys/unix/thread.rs:87
#16 0x00007fb5179914f9 in start_thread () from target:/lib64/libpthread.so.0
#17 0x00007fb517fdbfbf in clone () from target:/lib64/libc.so.6

The way I use the lib is to define metrics using register_int_counter_vec! inside lazy_static and bump them inside the threads. I create one thread per connection but I usually only have ~8 threads. Each thread will update the same labels.

So, I do something like this:

Globally:

lazy_static! {
    static ref RECORDS: IntCounterVec = register_int_counter_vec!(
        "some_metric",
        "some text",
        &["instance_id"]
    )
    .unwrap();
}

In the thread:

                RECORDS
                    .with_label_values(&[&iid[..]])
                    .inc_by(records.len() as u64);

mhristache commented 3 years ago

I realized my issue was not related to rust-prometheus. The high cpu usage reported for rust-prometheus was just a symptom of another issue in my code causing an infinite loop. Sorry for the wrong report.

tikv / rust-prometheus

High cpu usage #389