ocsigen / lwt

OCaml promises and concurrent I/O
https://ocsigen.org/lwt
MIT License
714 stars 176 forks source link

Segmentation Fault: in process#close (docker / musl / alpine --only) #889

Open smondet opened 3 years ago

smondet commented 3 years ago

I'm seeing a binary built inside a docker alpine container seg-fault at the method #close of a Lwt_process.process. (It just works everywhere else I've tried).

bash-5.1$ cat /etc/alpine-release 
3.13.6
bash-5.1$ ldd ./flextesa 
    /lib/ld-musl-x86_64.so.1 (0x7f9318509000)
    libgmp.so.10 => /usr/lib/libgmp.so.10 (0x7f93158f8000)
    libffi.so.7 => /usr/lib/libffi.so.7 (0x7f93158ed000)
    libev.so.4 => /usr/lib/libev.so.4 (0x7f93158df000)
    libc.musl-x86_64.so.1 => /lib/ld-musl-x86_64.so.1 (0x7f9318509000)

With strace:

...
poll([{fd=7, events=POLLOUT}], 1, 0)    = 1 ([{fd=7, revents=POLLOUT}])
futex(0x7f50eab0f904, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x563254aec1c4, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x7f50eab0f904, FUTEX_WAKE_PRIVATE, 1) = 1
getpid()                                = 636
epoll_pwait(3, [{EPOLLIN, {u32=4, u64=4294967300}}], 64, 912, NULL, 8) = 1
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1 RT_2], [], 8) = 0
read(4, "\1\0\0\0\0\0\0\0", 8)          = 8
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, ~[], [], 8)   = 0
fork()                                  = 638
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
close(5)                                = 0
close(6)                                = 0
wait4(638, 0x7ffe680c894c, WNOHANG, 0x7ffe680c89e0) = 0
getpid()                                = 636
epoll_pwait(3, [], 64, 0, NULL, 8)      = 0
getpid()                                = 636
epoll_pwait(3, [], 64, 0, NULL, 8)      = 0
write(1, "proc\n", 5proc
)                   = 5
write(1, "wait\n", 5wait
)                   = 5
write(1, "close\n", 6close
)                  = 6
getpid()                                = 636
epoll_pwait(3, 0x5632552b3380, 64, 910, NULL, 8) = -1 EINTR (Interrupted system call)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=638, si_uid=1000, si_status=1, si_utime=0, si_stime=0} ---
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1 RT_2], [CHLD], 8) = 0
write(4, "\1\0\0\0\0\0\0\0", 8)         = 8
rt_sigprocmask(SIG_SETMASK, [CHLD], NULL, 8) = 0
rt_sigreturn({mask=[]})                 = -1 EINTR (Interrupted system call)
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x28} ---
rt_sigaction(SIGSEGV, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x56325388fb08}, NULL, 8) = 0
rt_sigreturn({mask=[]})                 = 139985525524096
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x28} ---
+++ killed by SIGSEGV (core dumped) +++
Segmentation fault

a gdb backtrace:

#0  0x00007f41503d2ed5 in strerror_l () from /lib/ld-musl-x86_64.so.1
#1  0x00007f415040035d in perror () from /lib/ld-musl-x86_64.so.1
#2  0x0000558f4c1fcf80 in ?? ()
#3  0x0000558f4c1fcf80 in ?? ()
#4  0x0000000000000002 in ?? ()
#5  0x00007f414ff33ce8 in ?? ()
#6  0x0000558f4c1f2020 in ?? ()
#7  0x00007f415033a7ba in ?? () from /usr/lib/libev.so.4
#8  0x00007f415033dcf1 in ev_run () from /usr/lib/libev.so.4
#9  0x0000558f4abbb108 in ev_loop (flags=2, loop=0x558f4c1fcf80) at /usr/include/ev.h:841
#10 lwt_libev_loop (val_loop=<optimized out>, val_block=<optimized out>) at lwt_libev_stubs.c:102
#11 0x0000558f4a743016 in camlLwt_engine__fun_2085 () at src/unix/lwt_engine.ml:182
#12 0x0000558f4a7460ca in camlLwt_main__run_loop_135 () at src/unix/lwt_main.ml:41
#13 0x0000558f4a74630a in camlLwt_main__run_176 () at src/unix/lwt_main.ml:118
#14 0x0000558f4a0ce579 in camlFlextesa__Internal_pervasives__run_application_5220 () at src/lib/internal_pervasives.ml:353
#15 0x0000558f4a652dd8 in camlCmdliner_term__fun_182 () at cmdliner_term.ml:25
#16 0x0000558f4a655d37 in camlCmdliner__run_414 () at cmdliner.ml:117
#17 0x0000558f4a656048 in camlCmdliner__term_eval_440 () at cmdliner.ml:147
#18 0x0000558f4a656c3b in camlCmdliner__eval_choice_inner_1638 () at cmdliner.ml:265
#19 0x0000558f49d33ffc in camlMain__entry () at src/app/main.ml:151
#20 0x0000558f49d274b9 in caml_program ()
#21 0x0000558f4abf0f80 in caml_start_program ()
#22 0x0000558f4abce27c in caml_startup_common (argv=0x7fff79148368, pooling=<optimized out>, pooling@entry=0) at startup_nat.c:158
#23 0x0000558f4abce2fb in caml_startup_exn (argv=<optimized out>) at startup_nat.c:163
#24 caml_startup (argv=<optimized out>) at startup_nat.c:168
#25 caml_main (argv=<optimized out>) at startup_nat.c:175
#26 0x0000558f49d2208c in main (argc=<optimized out>, argv=<optimized out>) at main.c:41
(gdb) 
MisterDA commented 3 years ago

Alpine only? Segfault? This reads like stack overflow to me ;) Alpine (musl really iirc) has a default stack size of 8mb which is way inferior to other distros/libc. note that the topic is in vogue!

https://ariadne.space/2021/06/25/understanding-thread-stack-sizes-and-how-alpine-is-different/ https://utcc.utoronto.ca/~cks/space/blog/programming/CStackSizeInvisible (and the related HN threads…)