microsoft / terminal

The new Windows Terminal and the original Windows console host, all in the same place!
MIT License
95.09k stars 8.24k forks source link

The input speed of ConPTY / ConHost is very slow. #13594

Open lonnywong opened 2 years ago

lonnywong commented 2 years ago

Windows Terminal version

1.12.10982.0

Windows build number

10.0.19043.0

Other Software

https://github.com/UserExistsError/conpty v0.1.0

https://github.com/trzsz/trzsz-go v0.1.9

Steps to reproduce

  1. Install trzsz-go, download trzsz_0.1.9_windows_x86_64.zip and unzip, you will get trzsz.exe, trz.exe and tsz.exe.

  2. open cmd

  3. execute trzsz cmd

  4. execute trz, a dialog will pop up.

  5. choose a file in the dialog, the file will be uploaded to the current directory.

Expected Behavior

The trzsz create a ConPTY process ( cmd ), and send the file content as keystrokes.

The trz receive the file content and save to the current directory.

The upload speed should be 2MB/s at least. Same as the download speed.

Actual Behavior

The upload speed is only about 128KB/s. Looks like the keystrokes are speed limited.

lonnywong commented 2 years ago

I thought it was an issue of https://github.com/PowerShell/Win32-OpenSSH/issues/1944 .

Actually, it should be an issue of ssh, ConPTY and Windows stdin.

lonnywong commented 2 years ago

There is a minimal example.

  1. Make a test directory on Windows. Save 2 source files: test_input.c and test_client.go.

  2. test_input.c

    
    #include <stdio.h>
    #include <stdlib.h>
    #include <sys/time.h>
    #include <unistd.h>
    #include <windows.h>

int ReadTestData() { unsigned int size = 0; if (scanf("%u", &size) != 1) { puts("invalid size"); return -11; }

char buf[100]; unsigned int last_count = 0, read_count = 0; struct timeval begin_time, last_time, current_time; gettimeofday(&begin_time, NULL); last_time = begin_time;

while (read_count < size) { ssize_t n = read(0, buf, sizeof(buf)); if (n <= 0) { puts("read error"); return -12; } read_count += n;

gettimeofday(&current_time, NULL);
if (current_time.tv_sec - last_time.tv_sec > 1) {
  long t = (current_time.tv_sec - last_time.tv_sec) * 1000 +
           (current_time.tv_usec - last_time.tv_usec) / 1000;
  printf("read %u bytes in %ldms, speed: %.2fKB/s\n",
         read_count - last_count, t,
         (read_count - last_count) / 1024.0 * 1000 / t);
  last_time = current_time;
  last_count = read_count;
}

}

gettimeofday(&current_time, NULL); long t = (current_time.tv_sec - begin_time.tv_sec) 1000 + (current_time.tv_usec - begin_time.tv_usec) / 1000; printf("read %u bytes in %ldms, speed: %.2fKB/s\n", read_count, t, read_count / 1024.0 1000 / t);

return 0; }

int main() { HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE); if (hStdin == INVALID_HANDLE_VALUE) { puts("GetStdHandle failed"); return -1; } DWORD fdwSaveOldMode; if (!GetConsoleMode(hStdin, &fdwSaveOldMode)) { puts("GetConsoleMode failed"); return -2; } if (!SetConsoleMode(hStdin, 0)) { puts("SetConsoleMode failed"); return -3; }

int ret = ReadTestData();

SetConsoleMode(hStdin, fdwSaveOldMode);

return ret; }

// gcc -o test_input test_input.c


3. `test_client.go`
```go
package main

import (
        "context"
        "io"
        "log"
        "os"
        "strconv"
        "syscall"

        "github.com/UserExistsError/conpty"
        "golang.org/x/sys/windows"
)

func writeTestData(cpty *conpty.ConPty) {
        const size int = 100
        const count int = 0x10000

        var data = make([]byte, size)
        for i := 0; i < size; i++ {
                data[i] = 'A'
        }
        data[size-2] = '\r'
        data[size-1] = '\n'

        cpty.Write([]byte(strconv.Itoa(size*count) + "\r\n"))

        for i := 0; i < count; i++ {
                cpty.Write(data)
        }

        // send more data to make sure exit
        for i := 0; i < 1000; i++ {
                cpty.Write(data)
        }
}

func main() {
        commandLine := "test_input"

        var outMode uint32
        outHandle, err := syscall.GetStdHandle(syscall.STD_OUTPUT_HANDLE)
        if err != nil {
                log.Fatalf("Failed to GetStdHandle:  %v", err)
        }
        if err := windows.GetConsoleMode(windows.Handle(outHandle), &outMode); err != nil {
                log.Fatalf("Failed to GetConsoleMode:  %v", err)
        }
        if err := windows.SetConsoleMode(windows.Handle(outHandle),
                windows.ENABLE_PROCESSED_OUTPUT|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING|windows.DISABLE_NEWLINE_AUTO
_RETURN); err != nil {
                log.Fatalf("Failed to SetConsoleMode:  %v", err)
        }
        defer windows.SetConsoleMode(windows.Handle(outHandle), outMode)

        cpty, err := conpty.Start(commandLine)
        if err != nil {
                log.Fatalf("Failed to spawn a pty for [%s]:  %v", commandLine, err)
        }
        defer cpty.Close()

        go func() {
                go io.Copy(os.Stdout, cpty)
                writeTestData(cpty)
        }()

        exitCode, err := cpty.Wait(context.Background())
        if err != nil {
                log.Fatalf("Error: %v", err)
        }
        log.Printf("ExitCode: %d", exitCode)
}
  1. Run the test on Windows.

    gcc -o test_input test_input.c
    go mod init example.com/m/v2
    go get github.com/UserExistsError/conpty
    go run test_client.go
  2. You should get some output like:

    read 8600 bytes in 1870ms, speed: 4.49KB/s
    read 6800 bytes in 1952ms, speed: 3.40KB/s
    read 8800 bytes in 2042ms, speed: 4.21KB/s
    read 12700 bytes in 1972ms, speed: 6.29KB/s
    read 7400 bytes in 2005ms, speed: 3.60KB/s
    read 18300 bytes in 1976ms, speed: 9.04KB/s
    read 373300 bytes in 2000ms, speed: 182.28KB/s
    read 369900 bytes in 2000ms, speed: 180.62KB/s
    read 370700 bytes in 2000ms, speed: 181.01KB/s
    read 368600 bytes in 2000ms, speed: 179.98KB/s
    read 372200 bytes in 2000ms, speed: 181.74KB/s
    read 369300 bytes in 2000ms, speed: 180.32KB/s
    read 373200 bytes in 2000ms, speed: 182.23KB/s
    read 368600 bytes in 2000ms, speed: 179.98KB/s
    read 373700 bytes in 2000ms, speed: 182.47KB/s
    read 371200 bytes in 2000ms, speed: 181.25KB/s
    read 366100 bytes in 2000ms, speed: 178.76KB/s
    read 370600 bytes in 2000ms, speed: 180.96KB/s
    read 370400 bytes in 2000ms, speed: 180.86KB/s
    read 371600 bytes in 2000ms, speed: 181.45KB/s
    read 365700 bytes in 2000ms, speed: 178.56KB/s
    read 370000 bytes in 2000ms, speed: 180.66KB/s
    read 371400 bytes in 2000ms, speed: 181.35KB/s
    r2022/09/16 22:09:22 ExitCode: 0
lonnywong commented 2 years ago

If change read to ReadConsoleInput, It's a little faster.

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>
#include <windows.h>

int ReadTestData(HANDLE hStdin) {
  unsigned int size = 0;
  if (scanf("%u", &size) != 1) {
    puts("invalid size");
    return -11;
  }

  DWORD cNumRead, i;
  INPUT_RECORD irInBuf[128];

  unsigned int last_count = 0, read_count = 0;
  struct timeval begin_time, last_time, current_time;
  gettimeofday(&begin_time, NULL);
  last_time = begin_time;

  while (read_count < size) {
    if (!ReadConsoleInput(hStdin,     // input buffer handle
                          irInBuf,    // buffer to read into
                          128,        // size of read buffer
                          &cNumRead)) // number of records read
    {
      puts("read error");
      return -12;
    }
    for (i = 0; i < cNumRead; i++) {
      if (irInBuf[i].EventType == KEY_EVENT &&
          irInBuf[i].Event.KeyEvent.bKeyDown) {
        read_count++;
      }
    }

    gettimeofday(&current_time, NULL);
    if (current_time.tv_sec - last_time.tv_sec > 1) {
      long t = (current_time.tv_sec - last_time.tv_sec) * 1000 +
               (current_time.tv_usec - last_time.tv_usec) / 1000;
      printf("read %u bytes in %ldms, speed: %.2fKB/s\n",
             read_count - last_count, t,
             (read_count - last_count) / 1024.0 * 1000 / t);
      last_time = current_time;
      last_count = read_count;
    }
  }

  gettimeofday(&current_time, NULL);
  long t = (current_time.tv_sec - begin_time.tv_sec) * 1000 +
           (current_time.tv_usec - begin_time.tv_usec) / 1000;
  printf("read %u bytes in %ldms, speed: %.2fKB/s\n", read_count, t,
         read_count / 1024.0 * 1000 / t);

  return 0;
}

int main() {
  HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE);
  if (hStdin == INVALID_HANDLE_VALUE) {
    puts("GetStdHandle failed");
    return -1;
  }
  DWORD fdwSaveOldMode;
  if (!GetConsoleMode(hStdin, &fdwSaveOldMode)) {
    puts("GetConsoleMode failed");
    return -2;
  }
  if (!SetConsoleMode(hStdin, 0)) {
    puts("SetConsoleMode failed");
    return -3;
  }

  int ret = ReadTestData(hStdin);

  SetConsoleMode(hStdin, fdwSaveOldMode);

  return ret;
}

// gcc -o test_input test_input.c

read 21825 bytes in 1499ms, spe ed: 14.22KB/s read 5184 bytes in 2007ms, speed: 2.52KB/s read 2368 bytes in 1988ms, speed: 1.16KB/s read 3711 bytes in 2000ms, speed: 1.81KB/s read 8000 bytes in 2066ms, speed: 3.78KB/s read 4736 bytes in 1935ms, speed: 2.39KB/s read 652672 bytes in 1999ms, speed: 318.85KB/s read 650177 bytes in 2000ms, speed: 317.47KB/s read 673088 bytes in 2000ms, speed: 328.66KB/s read 663040 bytes in 2000ms, speed: 323.75KB/s read 683264 bytes in 2000ms, speed: 333.62KB/s read 667263 bytes in 2000ms, speed: 325.81KB/s read 670208 bytes in 2000ms, speed: 327.25KB/s read 660993 bytes in 2000ms, speed: 322.75KB/s read 658111 bytes in 2000ms, speed: 321.34KB/s read 6553600 bytes in 31092ms, speed: 205.84KB/s 2022/09/16 22:51:06 ExitCode: 0

zadjii-msft commented 1 year ago

Dunno how this fell out of the triage queue. That's weird.

Thanks for the report! Someone will need to dig in and do some traces. ConPTY's input was definitely originally designed for user-driven input, not necessarily bulk input at the level that something like rz might need. I'm sure there's some unexpected hot paths or dumb waits in there.

If someone wanted to be ambitious, I'd take a look at:

That's where the input comes in to conpty, attempts to be parsed into keys, and gets dumped to the input buffer.

lonnywong commented 1 year ago

@zadjii-msft Thanks for your help.

I wrote my own ssh client https://trzsz.github.io/ssh to avoid the issue. It works if the server is Linux. But the issue still exists if the server is Windows.

data flow upload speed
files -> trzsz -> ConPTY -> ssh -> sshd ( on Linux ) -> trz -> files slow
files -> tssh -> sshd ( on Linux ) -> trz -> files fast
files -> tssh -> sshd ( on Windows ) -> trz -> files slow

Is there other process between sshd and trz on Windows?

lhecker commented 10 months ago

While testing this again I found that my recent performance improvements to our input handling have improved throughput by 2x already (~600kB/s). In particular, I think it was 5b44476048c3bf9d0838b468e4fc0bcade9596da. I bet we could make this another 100x faster though (assuming trzsz is not a bottleneck and doesn't use INPUT_RECORDs).

lonnywong commented 10 months ago

While testing this again I found that my recent performance improvements to our input handling have improved throughput by 2x already (~600kB/s). In particular, I think it was 5b44476. I bet we could make this another 100x faster though (assuming trzsz is not a bottleneck and doesn't use INPUT_RECORDs).

@lhecker Thanks. I'm new to conpty and conhost. One more question, how to specify which conhost.exe to use for conpty.dll?

lonnywong commented 10 months ago

I know how to build conpty.dll and OpenConsole.exe. Is there a way to connect them? Or can I set the absolute path of conhost.exe in the conpty.dll source code?

DHowett commented 10 months ago

If you put OpenConsole.exe in the same directory as conpty.dll, it will use it.