100% cpu usage when send data with channel.data

Recently I got strange behavior on my app, it can stuck with 100% cpu usage forever, and only restart of proccess can fix that. After some investigations, I found that it happens here:

pub async fn execute(&mut self, command: &str, input: Option<&[u8]>) -> Result<SshResult> {
        let mut stdout_buffer = vec![];
        let mut stderr_buffer = vec![];
        let mut channel = self.session.channel_open_session().await?;
        let mut status = None;
        channel.exec(true, command).await?;
        if input.is_some() {
            channel.data(input.unwrap()).await?; // <--- 100% cpu happens here
            channel.eof().await?;
        }

        while let Some(msg) = channel.wait().await {
            match msg {
                ChannelMsg::Data { ref data } => {
                    stdout_buffer.write_all(data).await?;
                }
                ChannelMsg::ExtendedData { ref data, ext } => {
                    if ext == 1 {
                        stderr_buffer.write_all(data).await?
                    }
                }
                ChannelMsg::ExitStatus { exit_status } => {
                    status = Some(exit_status);
                }
                _ => { }
            }
        }

        match status {
            Some(status) => {
                if status != 0 {
                    bail!(
                        "Failed with exit code {}\n{}\n{}",
                        status,
                        String::from_utf8_lossy(&stdout_buffer),
                        String::from_utf8_lossy(&stderr_buffer)
                    )
                }
                Ok(SshResult {
                    stdout: stdout_buffer,
                    stderr: stderr_buffer,
                    status: status,
                })
            }
            None => bail!("Did not exit cleanly"),
        }
    }

Debug log:

[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 6
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 9
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 108
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 7
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 18
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 8220
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 8
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 18
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 8220
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 9
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 18
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 8220
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:187 - reading, len = [220, 62, 234, 212]
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:192 - reading, seqn = 6
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:195 - reading, clear len = 44
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:200 - read_exact 48
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:203 - read_exact done
[2024-04-15T03:49:22Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:210 - reading, padding_length 6
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:187 - reading, len = [30, 175, 215, 122]
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:192 - reading, seqn = 7
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:195 - reading, clear len = 60
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:200 - read_exact 64
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:203 - read_exact done
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:210 - reading, padding_length 18
[2024-04-15T03:49:26Z DEBUG russh::client::encrypted]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\client\encrypted.rs:628 - channel_window_adjust
[2024-04-15T03:49:26Z DEBUG russh::client::encrypted]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\client\encrypted.rs:633 - amount: 8192
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 10
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 18
[2024-04-15T03:49:26Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 8220
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:187 - reading, len = [186, 146, 45, 19]
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:192 - reading, seqn = 8
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:195 - reading, clear len = 60
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:200 - read_exact 64
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:203 - read_exact done
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:210 - reading, padding_length 18
[2024-04-15T03:49:29Z DEBUG russh::client::encrypted]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\client\encrypted.rs:628 - channel_window_adjust
[2024-04-15T03:49:29Z DEBUG russh::client::encrypted]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\client\encrypted.rs:633 - amount: 8192
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 11
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 18
[2024-04-15T03:49:29Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 8220
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:187 - reading, len = [105, 66, 190, 99]
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:192 - reading, seqn = 9
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:195 - reading, clear len = 60
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:200 - read_exact 64
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:203 - read_exact done
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:210 - reading, padding_length 18
[2024-04-15T03:50:03Z DEBUG russh::client::encrypted]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\client\encrypted.rs:628 - channel_window_adjust
[2024-04-15T03:50:03Z DEBUG russh::client::encrypted]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\client\encrypted.rs:633 - amount: 8192
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:147 - writing, seqn = 12
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:150 - padding length 18
[2024-04-15T03:50:03Z DEBUG russh::cipher]C:\Users\vladt\.cargo\registry\src\index.crates.io-6f17d22bba15001f\russh-0.43.0\src\cipher\mod.rs:152 - packet_length 8220

log.txt

Could you please take a profile of your program to see where it's looping?

Is it enough?

perf.zip

   7.79%  tokio-runtime-w  test_agent         [.] memcpy
   7.77%  tokio-runtime-w  test_agent         [.] <russh::channels::io::tx::ChannelTx<S> as tokio::io::async_write::AsyncWrite>::poll_write
   6.62%  tokio-runtime-w  test_agent         [.] tokio::runtime::scheduler::multi_thread::worker::Context::run
   5.77%  tokio-runtime-w  test_agent         [.] tokio::runtime::scheduler::multi_thread::worker::<impl tokio::runtime::scheduler::multi_thread::ha   5.11%  tokio-runtime-w  test_agent         [.] tokio::sync::batch_semaphore::Semaphore::add_permits_locked
   4.53%  tokio-runtime-w  test_agent         [.] tokio::sync::mutex::Mutex<T>::lock_owned::{{closure}}
   4.52%  tokio-runtime-w  test_agent         [.] <tokio::sync::batch_semaphore::Acquire as core::future::future::Future>::poll
   3.76%  tokio-runtime-w  test_agent         [.] __libc_free
   3.42%  tokio-runtime-w  test_agent         [.] tokio::runtime::context::with_scheduler
   3.16%  tokio-runtime-w  test_agent         [.] __unlock
   2.95%  tokio-runtime-w  test_agent         [.] <tokio::io::util::copy::Copy<R,W> as core::future::future::Future>::poll
   2.79%  tokio-runtime-w  test_agent         [.] tokio::sync::batch_semaphore::Semaphore::release
   2.78%  tokio-runtime-w  test_agent         [.] __lock
   2.77%  tokio-runtime-w  test_agent         [.] tokio::runtime::task::state::State::transition_to_running
   2.46%  tokio-runtime-w  test_agent         [.] tokio::runtime::scheduler::multi_thread::worker::Context::run_task
   2.45%  tokio-runtime-w  test_agent         [.] tokio::runtime::task::state::State::transition_to_idle
   2.14%  tokio-runtime-w  test_agent         [.] tokio::runtime::task::waker::wake_by_ref
   2.04%  tokio-runtime-w  test_agent         [.] tokio::runtime::task::state::State::ref_dec
   1.81%  tokio-runtime-w  test_agent         [.] agent::ssh::SshSession::execute::{{closure}}
   1.80%  tokio-runtime-w  test_agent         [.] sccp
   1.77%  tokio-runtime-w  test_agent         [.] __libc_malloc_impl
   1.70%  tokio-runtime-w  test_agent         [.] tokio::runtime::task::harness::Harness<T,S>::poll
   1.59%  tokio-runtime-w  test_agent         [.] tokio::runtime::context::scoped::Scoped<T>::with
   1.53%  tokio-runtime-w  test_agent         [.] alloc_slot
   1.51%  tokio-runtime-w  test_agent         [.] enframe
   1.42%  tokio-runtime-w  test_agent         [.] get_meta
   1.30%  tokio-runtime-w  test_agent         [.] tokio::runtime::task::core::Core<T,S>::poll

There is also additional context of this problem. Probably root reason is target device. I trying to execute command which sends ~200KB of data. But for unknown reason it recieves just 40KB and then I met 100% cpu usage on my side and stacked process on device with command (sh -c /cat > /some/file). BUT when I restarted this device problem is gone. So I can't catch this bug now, but behavior of 100% cpu is bad and can break my app again in the next time (it happened rarely some times before, but I could detect it only yesterday since I have thousands of devices where this command is executing). It should generate some error instead or at least react to timeout

I'm facing the same issue - 100% CPU usage (single core) when awaiting russh::Channel::data.

In comparison, ssh user@10.0.0.123 "cat > /dev/null" < /dev/urandom consumes barely any CPU.

Minimal example:

use async_trait::async_trait;
use russh::client;
use russh::client::Config;
use russh_keys::key;
use std::sync::Arc;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let addrs = ("10.0.0.123", 22);

    let mut session = client::connect(Arc::new(Config::default()), addrs, Client).await?;

    let auth_ok = session.authenticate_password("user", "pass").await?;
    if !auth_ok {
        anyhow::bail!("auth failed");
    }

    let channel = session.channel_open_session().await?;
    channel.exec(true, "cat > /dev/null").await?;

    let infinite_data = tokio::io::repeat(b'A');

    println!("sending data");
    channel.data(infinite_data).await?;

    unreachable!();
}

struct Client;

#[async_trait]
impl client::Handler for Client {
    type Error = russh::Error;

    async fn check_server_key(
        &mut self,
        _server_public_key: &key::PublicKey,
    ) -> Result<bool, Self::Error> {
        Ok(true)
    }
}

(I'm using russh 0.44.0-beta.2)

Flamegraph: flamegraph.svg.gz (russh is highlighted) 2024-07-01T23:05:11+02:00_3838x1893

I generated the flamegraph using cargo flamegraph --release. I had to let it run for a few minutes to make the data method more apparent in the perf data.

I also turned on debuginfo in the release profile:

# Cargo.toml

[profile.release]
debug = true

tokio::io::repeat is really not the best choice for benchmarking as it emits bytes one by one.

Here's a more realistic example:

OpenSSH

$ head -c 1048576000 /dev/urandom | time ssh eugene@localhost "cat > /dev/null" 
ssh eugene@localhost "cat > /dev/null"  5.02s user 0.68s system 91% cpu 6.243 total

1GB per 5s real CPU time

russh

$ time cargo run --release --example test3
    Finished release [optimized] target(s) in 0.15s
     Running `target/release/examples/test3`
sent 1048576000 bytes in 6.553448791 seconds
cargo run --release --example test3  10.98s user 1.20s system 162% cpu 7.496 total

1GB per 10s real CPU time - i.e. about half as fast

For me, debug = true incurs an additional 25% performance penalty

Code:

use async_trait::async_trait;
use russh::client;
use russh::client::Config;
use russh_keys::key;
use std::sync::Arc;
use tokio::time::Instant;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let addrs = ("localhost", 22);

    let mut session = client::connect(Arc::new(Config::default()), addrs, Client).await?;

    let auth_ok = session.authenticate_password("eugene", "").await?;
    if !auth_ok {
        anyhow::bail!("auth failed");
    }

    let channel = session.channel_open_session().await?;
    channel.exec(true, "cat > /dev/null").await?;

    let mut bytes_sent = 0;
    let block = [0; 1024 * 1024];
    let t = Instant::now();
    for _ in 0..1000 {
        channel.data(&block[..]).await?;
        bytes_sent += block.len();
    }
    let elapsed = t.elapsed();
    let elapsed = elapsed.as_secs() as f64;
    println!("sent {} bytes in {} seconds", bytes_sent, elapsed);
    println!(
        "{} MB per second",
        (bytes_sent as f64 / elapsed / 1_000_000.0) as u64
    );

    Ok(())
}

struct Client;

#[async_trait]
impl client::Handler for Client {
    type Error = russh::Error;

    async fn check_server_key(
        &mut self,
        _server_public_key: &key::PublicKey,
    ) -> Result<bool, Self::Error> {
        Ok(true)
    }
}

(also, as most CPU is spent in encryption, different cipher selection can account for huge performance deviations, e.g. 50% between Chacha20-Poly1305 and AES)

I have same problem when the connection is lost while sending the large amount of data.

tokio::io::repeat is really not the best choice for benchmarking as it emits bytes one by one.

I replaced tokio::io::repeat in my original example with just channel.data(vec![0_u8; 67108864].as_slice()).await?;, and the behavior was exactly the same - 100% single core CPU usage when sending the data, so it doesn't matter.

Here's a more realistic example:

OpenSSH

$ head -c 1048576000 /dev/urandom | time ssh eugene@localhost "cat > /dev/null" 
ssh eugene@localhost "cat > /dev/null"  5.02s user 0.68s system 91% cpu 6.243 total

1GB per 5s real CPU time

russh

$ time cargo run --release --example test3
    Finished release [optimized] target(s) in 0.15s
     Running `target/release/examples/test3`
sent 1048576000 bytes in 6.553448791 seconds
cargo run --release --example test3  10.98s user 1.20s system 162% cpu 7.496 total

1GB per 10s real CPU time - i.e. about half as fast

For me, debug = true incurs an additional 25% performance penalty

This benchmark measures data throughput which is unrelated to the issue I described.
Using loopback is not realistic. The point was to measure CPU usage while sending data to a real device over a (relatively) slow ethernet.

Sending data over a slow network takes orders of magnitude more time than any encryption, even in debug mode. The expected behavior is that the SSH implementation sleeps and does nothing while the data are being transmitted over the wire.

Benchmark

OpenSSH

$ head -c 67108864 /dev/zero | time ssh user@10.0.0.123 "cat > /dev/null"
ssh user@10.0.0.123 "cat > /dev/null"  0.26s user 0.27s system 1% cpu 33.915 total

`russh` debug

$ time cargo run --example ssh_copy
    Finished dev [unoptimized + debuginfo] target(s) in 0.13s
     Running `target/debug/examples/ssh_copy`
sending data
cargo run --example ssh_copy  29.09s user 0.16s system 117% cpu 24.881 total

`russh` release

$ time cargo run --example ssh_copy --release
    Finished release [optimized] target(s) in 0.13s
     Running `target/release/examples/ssh_copy`
sending data
cargo run --example ssh_copy --release  24.98s user 0.18s system 99% cpu 25.276 total

OpenSSH is using 1% CPU, while russh is using 117% CPU in debug mode and 99% CPU in release mode.

The issue

The behavior of OpenSSH is as expected - roughly 1% of the time it's actually doing anything, and 99% of the time it just sleeps while the data are being sent over the wire.

russh on the other hand is wasting CPU 100% of the time. This means that there must be some sort of busy loop in the implementation of the russh::Channel::data future.

Source for the ssh_copy example

Using `russh` version `0.45.0`. ```rust use async_trait::async_trait; use russh::client; use russh::client::Config; use russh_keys::key; use std::sync::Arc; #[tokio::main] async fn main() -> anyhow::Result<()> { let addrs = ("10.0.0.123", 22); let mut session = client::connect(Arc::new(Config::default()), addrs, Client).await?; let auth_ok = session.authenticate_password("user", "pass").await?; if !auth_ok { anyhow::bail!("auth failed"); } let channel = session.channel_open_session().await?; channel.exec(true, "cat > /dev/null").await?; let data = vec![0_u8; 67108864]; println!("sending data"); channel.data(data.as_slice()).await?; Ok(()) } struct Client; #[async_trait] impl client::Handler for Client { type Error = russh::Error; async fn check_server_key( &mut self, _server_public_key: &key::PublicKey, ) -> Result { Ok(true) } } ```

Eugeny / russh