compio-rs / compio

A thread-per-core Rust runtime with IOCP/io_uring/polling.
MIT License
420 stars 37 forks source link

net bench panic #24

Closed neetdai closed 1 year ago

neetdai commented 1 year ago
tcp/tokio               time:   [917.80 µs 943.99 µs 972.57 µs]
Found 3 outliers among 100 measurements (3.00%)
  2 (2.00%) high mild
  1 (1.00%) high severe
Benchmarking tcp/compio: Warming up for 3.0000 sthread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Os { code: 52, kind: Uncategorized, message: "由于网络上有重名,没有连接。如果加
入域,请转到“控制面板”中的“系统”更改计算机名,然后重试。如果加入工作组,请选择其他工作组名。" }', benches\net.rs:48:65
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

thread 'main' panicked at 'cannot access a Thread Local Storage value during or after destruction: AccessError', /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be\library\std\src\thread\local.rs:246:26  
thread 'main' panicked at 'aborting the process', C:\Users\neet\.cargo\registry\src\mirrors.ustc.edu.cn-61ef6e0cd06fb9b8\async-task-4.4.0\src\utils.rs:17:5
stack backtrace:
   0:     0x7ff656ef210c - std::sys_common::backtrace::_print::impl$0::fmt
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\sys_common\backtrace.rs:44
   1:     0x7ff656f15d1b - core::fmt::rt::Argument::fmt
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\core\src\fmt\rt.rs:138
   2:     0x7ff656f15d1b - core::fmt::write
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\core\src\fmt\mod.rs:1094
   3:     0x7ff656eec4bf - std::io::Write::write_fmt<std::sys::windows::stdio::Stderr>
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\io\mod.rs:1714
   4:     0x7ff656ef1ebb - std::sys_common::backtrace::_print
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\sys_common\backtrace.rs:47
   5:     0x7ff656ef1ebb - std::sys_common::backtrace::print
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\sys_common\backtrace.rs:34
   6:     0x7ff656ef498a - std::panicking::default_hook::closure$1
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:269
   7:     0x7ff656ef45df - std::panicking::default_hook
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:288
   8:     0x7ff656ef503e - std::panicking::rust_panic_with_hook
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:705
   9:     0x7ff656d37779 - crossbeam_queue::seg_queue::SegQueue<T>::pop::h202a01f2630962d0
  10:     0x7ff656d37749 - crossbeam_queue::seg_queue::SegQueue<T>::pop::h202a01f2630962d0
  11:     0x7ff656f203f8 - core::panicking::assert_failed::hf7de5ebfb34e0a52
  12:     0x7ff656d3785e - async_task::utils::abort::hb66b0a56c5f5467a
  13:     0x7ff656d37889 - <async_task::utils::abort_on_panic::Bomb as core::ops::drop::Drop>::drop::h165c1df495236742
  14:     0x7ff656cc1978 - async_task::raw::RawTask<F,T,S,M>::drop_future::h0767c0314ff48410
  15:     0x7ff942db1030 - <unknown>
  16:     0x7ff942db4f5e - is_exception_typeof
  17:     0x7ff942dbf500 - _C_specific_handler
  18:     0x7ff942db416c - is_exception_typeof
  19:     0x7ff942dbfe41 - _CxxFrameHandler3
  20:     0x7ff9534d245f - _chkstk
  21:     0x7ff953460939 - RtlUnwindEx
  22:     0x7ff942dbf9ce - _C_specific_handler
  23:     0x7ff942db2e25 - is_exception_typeof
  24:     0x7ff942db3244 - is_exception_typeof
  25:     0x7ff942db4268 - is_exception_typeof
  26:     0x7ff942dbfe41 - _CxxFrameHandler3
  27:     0x7ff9534d23df - _chkstk
  28:     0x7ff9534814b4 - RtlRaiseException
  29:     0x7ff953481207 - RtlRaiseException
  30:     0x7ff950c3cf19 - RaiseException
  31:     0x7ff942db6720 - CxxThrowException
  32:     0x7ff656f0521e - panic_unwind::real_imp::panic
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\panic_unwind\src\seh.rs:322
  33:     0x7ff656f0521e - panic_unwind::__rust_start_panic
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\panic_unwind\src\lib.rs:103
  34:     0x7ff656ef53e9 - std::panicking::rust_panic
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:755
  35:     0x7ff656ef5202 - std::panicking::rust_panic_with_hook
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:727
  36:     0x7ff656ef4f2d - std::panicking::begin_panic_handler::closure$0
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:597
  37:     0x7ff656ef2d59 - std::sys_common::backtrace::__rust_end_short_backtrace<std::panicking::begin_panic_handler::closure_env$0,never$>
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\sys_common\backtrace.rs:151
  38:     0x7ff656ef4c30 - std::panicking::begin_panic_handler
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:593
  39:     0x7ff656f2e3e5 - core::panicking::panic_fmt
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\core\src\panicking.rs:67
  40:     0x7ff656f2e8f3 - core::result::unwrap_failed
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\core\src\result.rs:1651
  41:     0x7ff656cc1952 - async_task::raw::RawTask<F,T,S,M>::drop_future::h0767c0314ff48410
  42:     0x7ff656d0d96c - <alloc::collections::vec_deque::VecDeque<T,A> as core::ops::drop::Drop>::drop::h87e63025e70a03b7
  43:     0x7ff656d1923e - <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once::h1680530e2c5b0aeb
  44:     0x7ff656d1b335 - std::sys::common::thread_local::fast_local::Key<T>::try_initialize::hef15d406f4d95345
  45:     0x7ff656f02240 - std::sys::windows::thread_local_dtor::run_keyless_dtors
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\sys\windows\thread_local_dtor.rs:28
  46:     0x7ff656f02470 - std::sys::windows::thread_local_key::on_tls_callback
                               at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\sys\windows\thread_local_key.rs:246
  47:     0x7ff953449a1d - RtlActivateActivationContextUnsafeFast
  48:     0x7ff953449aff - RtlActivateActivationContextUnsafeFast
  49:     0x7ff95348dda5 - LdrShutdownProcess
  50:     0x7ff95348da8d - RtlExitUserProcess
  51:     0x7ff951fce82b - FatalExit
  52:     0x7ff950b305bc - exit
  53:     0x7ff950b3045f - exit
  54:     0x7ff656f1d093 - __scrt_common_main_seh
                               at D:\a\_work\1\s\src\vctools\crt\vcstartup\src\startup\exe_common.inl:295
  55:     0x7ff951fc7614 - BaseThreadInitThunk
  56:     0x7ff9534826b1 - RtlUserThreadStart
error: bench failed, to rerun pass `--bench net`

Caused by:
  process didn't exit successfully: `E:\rust\compio\target\release\deps\net-bf4587aad27d5461.exe --bench` (exit code: 255)
Berrysoft commented 1 year ago

Could not reproduce. Could you provide more information? Which operation caused this error? bind, connect, or accept?

Berrysoft commented 1 year ago

I guess it's connect caused this problem. A client need to bind to a localhost port to connect to a remote address. Your firewall may forbid you binding so many ports in a short time, or there are many services already binding many ports in your machine. I think it's not a bug.

neetdai commented 1 year ago

After set RUST_BACKTRACE=1

Gnuplot not found, using plotters backend
Benchmarking tcp/compio: Warming up for 3.0000 sthread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Os { code: 52, kind: Uncategorized, message: "由于网络上有重名,没有连接。如果加入域,请转到“控制面板”中的“系统”更改计算机名,然后重试。如果加入工作组,请选择其他工作组名。" }', benches\net.rs:49:65
stack backtrace:
   0: std::panicking::begin_panic_handler
             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\std\src\panicking.rs:593
   1: core::panicking::panic_fmt
             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\core\src\panicking.rs:67
   2: core::result::unwrap_failed
             at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library\core\src\result.rs:1651
   3: async_task::raw::RawTask<F,T,S,M>::run
   4: compio::task::runtime::Runtime::block_on
   5: compio::task::block_on
   6: criterion::bencher::AsyncBencher<A,M>::iter
   7: <criterion::routine::Function<M,F,T> as criterion::routine::Routine<M,T>>::warm_up
   8: criterion::routine::Routine::sample
   9: criterion::analysis::common
  10: criterion::benchmark_group::BenchmarkGroup<M>::bench_function
  11: <std::panicking::begin_panic::PanicPayload<A> as core::panic::BoxMeUp>::take_box
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

thread 'main' panicked at 'cannot access a Thread Local Storage value during or after destruction: AccessError', /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be\library\std\src\thread\local.rs:246:26
stack backtrace:
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
thread 'main' panicked at 'aborting the process', C:\Users\neet\.cargo\registry\src\mirrors.ustc.edu.cn-61ef6e0cd06fb9b8\async-task-4.4.0\src\utils.rs:17:5
stack backtrace:
error: bench failed, to rerun pass `--bench net`

Caused by:
  process didn't exit successfully: `E:\rust\compio\target\release\deps\net-bf4587aad27d5461.exe --bench` (exit code: 0xc0000409, STATUS_STACK_BUFFER_OVERRUN)

Only to run bench tcp compio twice, it reproduces. This doesn't look like a problem caused by too many clients. Because same run bench tcp tokio twice, it append

Benchmarking tcp/tokio: Warming up for 3.0000 sthread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: Os { code: 10048, kind: AddrInUse, message: "通常每个套接字地址(协议/网络地址/端口)只允许使用一次。" }', benches\net.rs:33:66
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

error: bench failed, to rerun pass `--bench net`
Berrysoft commented 1 year ago

Only to run bench tcp compio twice, it reproduces.

What do you mean by twice? Did you changed the code? Or running cargo bench twice?

Because same run bench tcp tokio twice, it append

Do you mean that tokio also causes such panic?

Berrysoft commented 1 year ago

I can reproduce this, but not steadily. I still guess it is because the client or server is created too much.

neetdai commented 1 year ago

What do you mean by twice? Did you changed the code? Or running cargo bench twice?

use criterion::{async_executor::AsyncExecutor, criterion_group, criterion_main, Criterion};

// criterion_group!(net, tcp, udp);
criterion_group!(net, tcp);
criterion_main!(net);

struct CompioRuntime;

impl AsyncExecutor for CompioRuntime {
    fn block_on<T>(&self, future: impl std::future::Future<Output = T>) -> T {
        compio::task::block_on(future)
    }
}

fn tcp(c: &mut Criterion) {
    const PACKET_LEN: usize = 1048576;
    static PACKET: &[u8] = &[1u8; PACKET_LEN];

    let mut group = c.benchmark_group("tcp");

    // group.bench_function("tokio", |b| {
    //     let runtime = tokio::runtime::Builder::new_current_thread()
    //         .enable_all()
    //         .build()
    //         .unwrap();
    //     b.to_async(&runtime).iter(|| async {
    //         use tokio::io::{AsyncReadExt, AsyncWriteExt};

    //         let listener = tokio::net::TcpListener::bind("127.0.0.1:9999").await.unwrap();
    //         let addr = listener.local_addr().unwrap();
    //         let tx = tokio::net::TcpStream::connect(addr);
    //         let rx = listener.accept();
    //         let (mut tx, (mut rx, _)) = tokio::try_join!(tx, rx).unwrap();
    //         tx.write_all(PACKET).await.unwrap();
    //         let mut buffer = Vec::with_capacity(PACKET_LEN);
    //         while buffer.len() < PACKET_LEN {
    //             rx.read_buf(&mut buffer).await.unwrap();
    //         }
    //         buffer
    //     })
    // });

    group.bench_function("compio", |b| {
        b.to_async(CompioRuntime).iter(|| async {
            let listener = compio::net::TcpListener::bind("127.0.0.1:9998").unwrap();
            let addr = listener.local_addr().unwrap();
            let tx = compio::net::TcpStream::connect(addr);
            let rx = listener.accept();
            let (tx, (rx, _)) = futures_util::try_join!(tx, rx).unwrap();
            tx.send_all(PACKET).await.0.unwrap();
            let buffer = Vec::with_capacity(PACKET_LEN);
            let (recv, buffer) = rx.recv_exact(buffer).await;
            recv.unwrap();
            buffer
        })
    });

    group.finish();
}

// fn udp(c: &mut Criterion) {
//     const PACKET_LEN: usize = 1024;
//     static PACKET: &[u8] = &[1u8; PACKET_LEN];

//     let mut group = c.benchmark_group("udp");

//     // The socket may be dropped by firewall when the number is too large.
//     #[cfg(target_os = "linux")]
//     group
//         .sample_size(16)
//         .measurement_time(std::time::Duration::from_millis(2))
//         .warm_up_time(std::time::Duration::from_millis(2));

//     group.bench_function("tokio", |b| {
//         let runtime = tokio::runtime::Builder::new_current_thread()
//             .enable_all()
//             .build()
//             .unwrap();
//         b.to_async(&runtime).iter(|| async {
//             let rx = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap();
//             let addr_rx = rx.local_addr().unwrap();
//             let tx = tokio::net::UdpSocket::bind("127.0.0.1:0").await.unwrap();
//             let addr_tx = tx.local_addr().unwrap();

//             rx.connect(addr_tx).await.unwrap();
//             tx.connect(addr_rx).await.unwrap();

//             {
//                 let mut pos = 0;
//                 while pos < PACKET_LEN {
//                     let res = tx.send(&PACKET[pos..]).await;
//                     pos += res.unwrap();
//                 }
//             }
//             {
//                 let mut buffer = vec![0; PACKET_LEN];
//                 let mut pos = 0;
//                 while pos < PACKET_LEN {
//                     let res = rx.recv(&mut buffer[pos..]).await;
//                     pos += res.unwrap();
//                 }
//                 buffer
//             }
//         })
//     });

//     group.bench_function("compio", |b| {
//         b.to_async(CompioRuntime).iter(|| async {
//             let rx = compio::net::UdpSocket::bind("127.0.0.1:0").unwrap();
//             let addr_rx = rx.local_addr().unwrap();
//             let tx = compio::net::UdpSocket::bind("127.0.0.1:0").unwrap();
//             let addr_tx = tx.local_addr().unwrap();

//             rx.connect(addr_tx).unwrap();
//             tx.connect(addr_rx).unwrap();

//             {
//                 let mut pos = 0;
//                 while pos < PACKET_LEN {
//                     let (res, _) = tx.send(&PACKET[pos..]).await;
//                     pos += res.unwrap();
//                 }
//             }
//             {
//                 let mut buffer = Vec::with_capacity(PACKET_LEN);
//                 let mut res;
//                 while buffer.len() < PACKET_LEN {
//                     (res, buffer) = rx.recv(buffer).await;
//                     res.unwrap();
//                 }
//                 buffer
//             }
//         })
//     });

//     group.finish();
// }
cargo bench --bench net

Run bench, until bench complete, run bench again.

rustc 1.72.0 (MVSC) windows 10

Berrysoft commented 1 year ago

Benched your code on Windows 11 but still cannot reproduce the problem.

By the way, I've fixed some bugs in the master. Please ensure you are using the latest code.

neetdai commented 1 year ago

The problem still continue after upgrading to the lastest code. I guess it isn't connect cause.

   3: async_task::raw::RawTask<F,T,S,M>::run
   4: compio::task::runtime::Runtime::block_on
   5: compio::task::block_on

thread 'main' panicked at 'cannot access a Thread Local Storage value during or after destruction: AccessError', /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be\library\std\src\thread\local.rs:246:26
Berrysoft commented 1 year ago

The log you pasted is not the root cause, but just a chain reaction after the first panic.

Besides the benchmark, did you find any other panics? If no, I think it's not a big problem... maybe:)

Berrysoft commented 1 year ago

Any updates?

Berrysoft commented 1 year ago

Feel free to open new issues if you find any other panics.