r-lib / processx

Execute and Control Subprocesses from R
https://processx.r-lib.org/
Other
234 stars 43 forks source link

Unnable to read more than 78643 chars from stdout pipe #369

Open chwpearse opened 1 year ago

chwpearse commented 1 year ago

When running a process I seem to unable to read more than 78643 chars.

If I use read_output_lines() I get no result and when I use read_output() it only reads the first 78,643 chars. I think read_output_lines() is not reaching the \n at the end of the line so it doesn't return anything.

After reading with read_output() I cannot read the rest of the line until I send new input. I have tried to troubleshoot with the R debugger but as the read happens through the dll file I can't see what's happening.

The below creates and runs a cmd file to write 100,000 chars, but processx only reads 78,643. It then does the same for 50,000 chars and it reads all 50,000 and prints a summary at the end.

This is on Windows 10 & 11

library(tidyverse)
library(processx)
rm(list = ls())

# Set file names to save to, and if + how long to wait
print_file <- 'to_print.txt'
print_file_2 <- 'to_print_2.txt'
cmd_file <- 'do.cmd'
cmd_file_2 <- 'do_2.cmd'
do_wait <- T
wait_secs <- 2

# Write a text file of 100,000 'a' ending a new line, this will be printed. Make a second with 50,000 chars
'a' %>% rep(100000) %>% paste0(collapse = '') %>% paste0(., '\n') %>% write_file(file = print_file)
'a' %>% rep(50000)  %>% paste0(collapse = '') %>% paste0(., '\n') %>% write_file(file = print_file_2)

# Write a cmd file that prompts for input, then prints the contents of the text file created above
paste0('@echo off \n:prompt\n set /p id="" \ntype ', print_file   , '\nset /p id=""') %>% write_file(cmd_file)
paste0('@echo off \n:prompt\n set /p id="" \ntype ', print_file_2 , '\nset /p id=""') %>% write_file(cmd_file_2)

if(exists('my_proc')){my_proc$kill()}
my_proc <- process$new(cmd_file, stdin = '|', stdout = '|', stderr = '|')

# Feed in \n to begin printing. Try and read output_lines, then just lines. 
# Repeat to try and catch any remaining chars not read
my_proc$write_input('\n')
if(do_wait){Sys.sleep(wait_secs)}

a_output_lines_1_1 <- my_proc$read_output_lines()
b_output_1_1 <- my_proc$read_output()
c_output_lines_1_2 <- my_proc$read_output_lines()
d_output_1_2 <- my_proc$read_output()

# The above only reads 78,643 chars, which is less than the 100,000 that should be printed
# The remaining 12,357 can be read if you feed in additional input
my_proc$write_input('-')
if(do_wait){Sys.sleep(wait_secs)}

e_output_lines_2_1 <- my_proc$read_output_lines()
f_output_2_1 <- my_proc$read_output()
g_output_lines_2_2 <- my_proc$read_output_lines()
h_output_2_2 <- my_proc$read_output()

# Collect all the output vars and their lengths into a table
results <- ls() %>% str_subset('output') %>% 
  map(~list(var = .x, length = .x %>% get %>% nchar %>% append(0) %>% pluck(1),
            has_linebreak = .x %>% str_detect('\n'),
            chars = .x %>% get %>% str_extract_all(pattern = '.') %>% unlist %>% unique() %>% paste0(collapse = ''))) %>%
  bind_rows()
# Clear output vars for next run
rm(list = ls() %>% str_subset('output'))

################
##### Part 2
##### Same as above but with 50,000 chars printed
################

if(exists('my_proc')){my_proc$kill()}
#> [1] FALSE
my_proc <- process$new(cmd_file_2, stdin = '|', stdout = '|', stderr = '|')

# Feed in \n to begin printing. Try and read output_lines, then just lines. Repeat to try and catch any remaining chars not read
my_proc$write_input('\n')
if(do_wait){Sys.sleep(wait_secs)}

a_output_lines_1_1 <- my_proc$read_output_lines()
b_output_1_1 <- my_proc$read_output()
c_output_lines_1_2 <- my_proc$read_output_lines()
d_output_1_2 <- my_proc$read_output()

# All 50,000 char are read on the first try
my_proc$write_input('-')
if(do_wait){Sys.sleep(wait_secs)}

e_output_lines_2_1 <- my_proc$read_output_lines()
f_output_2_1 <- my_proc$read_output()
g_output_lines_2_2 <- my_proc$read_output_lines()
h_output_2_2 <- my_proc$read_output()

results_2 <- ls() %>% str_subset('output') %>% 
  map(~list(var = .x, length = .x %>% get %>% nchar %>% append(0) %>% pluck(1),
            has_linebreak = .x %>% str_detect('\n'),
            chars = .x %>% get %>% str_extract_all(pattern = '.') %>% unlist %>% unique() %>% paste0(collapse = ''))) %>%
  bind_rows()

results
#> # A tibble: 8 × 4
#>   var                length has_linebreak chars
#>   <chr>               <dbl> <lgl>         <chr>
#> 1 a_output_lines_1_1      0 FALSE         ""   
#> 2 b_output_1_1        78643 FALSE         "a"  
#> 3 c_output_lines_1_2      0 FALSE         ""   
#> 4 d_output_1_2            0 FALSE         ""   
#> 5 e_output_lines_2_1  21357 FALSE         "a"  
#> 6 f_output_2_1            0 FALSE         ""   
#> 7 g_output_lines_2_2      0 FALSE         ""   
#> 8 h_output_2_2            0 FALSE         ""
results %>% summarise(tot_length = sum(length))
#> # A tibble: 1 × 1
#>   tot_length
#>        <dbl>
#> 1     100000

results_2
#> # A tibble: 8 × 4
#>   var                length has_linebreak chars
#>   <chr>               <dbl> <lgl>         <chr>
#> 1 a_output_lines_1_1  50000 FALSE         "a"  
#> 2 b_output_1_1            0 FALSE         ""   
#> 3 c_output_lines_1_2      0 FALSE         ""   
#> 4 d_output_1_2            0 FALSE         ""   
#> 5 e_output_lines_2_1      0 FALSE         ""   
#> 6 f_output_2_1            0 FALSE         ""   
#> 7 g_output_lines_2_2      0 FALSE         ""   
#> 8 h_output_2_2            0 FALSE         ""
results_2 %>% summarise(tot_length = sum(length))
#> # A tibble: 1 × 1
#>   tot_length
#>        <dbl>
#> 1      50000

Created on 2023-05-27 with reprex v2.0.2

Standard output and standard error ``` sh -- nothing to show -- ```
gaborcsardi commented 1 year ago

Three tips:

  1. if you know that you can have long lines, you need to use read_output() instead of read_output_lines().
  2. read_output() can always return less output than requested.
  3. you need to call $poll_io() before reading, even if you call it with no waiting time: $poll_io(0).
chwpearse commented 1 year ago

Ah thanks for that. For anyone who hits this problem in the future here's what I did:

proc_read_long_singleline <- function(proc, wait = 100, timeout = 5000){
  my_proc$poll_io(wait)
  counter <- 1
  output_lines_res <- proc$read_output_lines(1)
  output_res <- c()

  while(length(output_lines_res) == 0 & counter*wait <= timeout){
    output_res <- proc$read_output() %>% append(output_res, .)
    proc$poll_io(wait)
    counter <- counter + 1
    output_lines_res <- proc$read_output_lines(1) %>% append(output_lines_res, .)
  }

  res_output <- paste0(output_res, collapse = '') %>% paste0(output_lines_res)
}

if(exists('my_proc')){my_proc$kill()}
my_proc <- process$new(cmd_file, stdin = '|', stdout = '|', stderr = '|')
my_proc$write_input('\n')

read_chars <- my_proc %>% proc_read_long_singleline()
gaborcsardi commented 1 year ago

I think this is documented, but I'll keep this issue open until we improve the documentation.