Closed hanwen closed 7 months ago
=== RUN TestFlockExclusive
12:24:24.437247 callFusermount: executing ["/usr/bin/fusermount3" "/tmp/TestFlockExclusive3375078584/001/mnt" "-o" "subtype=pathfs.pathInode,max_read=131072"]
12:24:24.442108 rx 2: INIT n0 {7.38 Ra 131072 ASYNC_READ,POSIX_LOCKS,ATOMIC_O_TRUNC,EXPORT_SUPPORT,BIG_WRITES,DONT_MASK,SPLICE_WRITE,SPLICE_MOVE,SPLICE_READ,FLOCK_LOCKS,IOCTL_DIR,AUTO_INVAL_DATA,READDIRPLUS,READDIRPLUS_AUTO,ASYNC_DIO,WRITEBACK_CACHE,NO_OPEN_SUPPORT,PARALLEL_DIROPS,HANDLE_KILLPRIV,POSIX_ACL,ABORT_ERROR,MAX_PAGES,CACHE_SYMLINKS,NO_OPENDIR_SUPPORT,EXPLICIT_INVAL_DATA,0x70000000} "\x0f\x00\x00\x00\x00\x00\x00\x00"... 48b
12:24:24.442153 tx 2: OK, {7.28 Ra 131072 ASYNC_READ,BIG_WRITES,AUTO_INVAL_DATA,READDIRPLUS,NO_OPEN_SUPPORT,PARALLEL_DIROPS,MAX_PAGES 0/0 Wr 131072 Tg 0 MaxPages 32}
12:24:24.442466 rx 4: LOOKUP n1 [".go-fuse-epoll-hack"] 20b
12:24:24.442540 tx 4: OK, {n18446744073709551615 g0 tE=0s tA=0s {M0100644 SZ=0 L=1 0:0 B0*0 i0:18446744073709551615 A 0.000000 M 0.000000 C 0.000000}}
12:24:24.442641 rx 6: OPEN n18446744073709551615 {LARGEFILE}
12:24:24.442821 tx 6: OK, {Fh 18446744073709551615 }
12:24:24.442903 rx 8: POLL n18446744073709551615
12:24:24.442929 tx 8: 38=function not implemented
12:24:24.442991 rx 10: FLUSH n18446744073709551615 {Fh 18446744073709551615}
12:24:24.443013 tx 10: OK
12:24:24.443223 rx 12: RELEASE n18446744073709551615 {Fh 18446744073709551615 LARGEFILE L0}
12:24:24.443245 tx 12: OK
12:24:24.443282 rx 14: LOOKUP n1 ["hello.txt"] 10b
12:24:24.443345 tx 14: OK, {n3 g2 tE=0.1s tA=0.1s {M0100700 SZ=3 L=1 1000:1000 B8*4096 i0:11337 A 1695551064.442542 M 1695551064.442542 C 1695551064.442542}}
12:24:24.443429 rx 16: OPEN n3 {WRONLY,LARGEFILE}
12:24:24.443507 tx 16: OK, {Fh 2 }
12:24:24.443701 rx 18: OPEN n3 {WRONLY,LARGEFILE}
12:24:24.443803 tx 18: OK, {Fh 3 }
(hung)
it hangs on executing the external flock command. The flock is not the in ps -ef
output. The mount doesn't respond to anything, even though there are many goroutines blocked on reading from the fuse device.
Further investigation:
cmd.ExtraFiles = []*os.File{f}
fixes the hang./usr/bin/flock
) is irrelevant. A nonexistent file also does the job.strace dump:
[pid 87039] clone(child_stack=NULL, flags=CLONE_VM|CLONE_VFORK|SIGCHLD <unfinished ...>
strace: Process 87046 attached
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGHUP, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] rt_sigaction(SIGINT, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87046] rt_sigaction(SIGQUIT, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGILL, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] rt_sigaction(SIGTRAP, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87046] rt_sigaction(SIGABRT, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGBUS, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] rt_sigaction(SIGFPE, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87046] rt_sigaction(SIGUSR1, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGSEGV, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGUSR2, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGPIPE, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGALRM, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGSTKFLT, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGCHLD, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGURG, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGXCPU, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGXFSZ, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGVTALRM, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGPROF, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGWINCH, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGIO, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGPWR, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGSYS, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_3, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_4, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_5, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_6, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_7, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_8, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_9, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_10, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_11, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_12, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_13, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_14, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_15, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_16, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_17, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_18, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_19, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_20, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_21, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_22, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_23, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_24, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_25, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_26, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_27, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_28, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_29, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_30, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigaction resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] rt_sigaction(SIGRT_31, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigaction(SIGRT_32, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x46d0a0}, NULL, 8) = 0
[pid 87046] rt_sigprocmask(SIG_SETMASK, [], <unfinished ...>
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] <... rt_sigprocmask resumed>NULL, 8) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87046] dup3(11, 0, 0) = 0
[pid 87046] dup3(13, 1, 0) = 1
[pid 87046] dup3(13, 2, 0) = 2
[pid 87037] <... nanosleep resumed>NULL) = 0
[pid 87046] dup3(9, 3, 0 <unfinished ...>
[pid 87037] nanosleep({tv_sec=0, tv_nsec=20000}, <unfinished ...>
[pid 87044] <... read resumed>"@\0\0\0\31\0\0\0\24\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 131200) = 64
...
[pid 87037] nanosleep({tv_sec=0, tv_nsec=640000}, NULL) = 0
[pid 87037] epoll_pwait(4, [{events=EPOLLOUT, data={u32=3789921368, u64=139800680438872}}], 128, 0, NULL, 0) = 1
[pid 87037] getpid() = 87036
[pid 87037] tgkill(87036, 87039, SIGURG) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=1280000}, NULL) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=2560000}, NULL) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=5120000}, NULL) = 0
[pid 87037] epoll_pwait(4, [], 128, 0, NULL, 0) = 0
[pid 87037] nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
..
(epoll/nanosleep repeats.)
successful run:
GOMAXPROCS=2 ...
tick 2023-11-08 15:19:32.447870732 +0100 CET m=+0.808127074
tick 2023-11-08 15:19:32.547355177 +0100 CET m=+0.907611530
tick 2023-11-08 15:19:32.647782274 +0100 CET m=+1.008038615
15:19:32.647959 cmd /does/not/exist --exclusive --nonblock 3
15:19:32.648588 rx 20: FLUSH n3 {Fh 2}
15:19:32.648627 tx 20: OK
15:19:32.649139 rx 22: FLUSH n3 {Fh 3}
15:19:32.649185 tx 22: OK
15:19:32.649262 rx 24: FLUSH n3 {Fh 3}
15:19:32.649285 tx 24: OK
15:19:32.649494 start err fork/exec /does/not/exist: no such file or directory
strace of successful run:
[pid 87474] dup3(9, 3, 0 <unfinished ...>
[pid 87467] <... read resumed>"@\0\0\0\31\0\0\0\24\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 131200) = 64
[pid 87467] write(2, "15:22:54.675266 rx 20: FLUSH n3 "..., 4015:22:54.675266 rx 20: FLUSH n3 {Fh 2}
<unfinished ...>
looks like
1) dup3() results in a FUSE call for FLUSH opcode. 2) in the hanging case, there is no OS thread anymore to run any of the FUSE readers.
in the hanging case, we can see:
[pid 87044] <... read resumed>"@\0\0\0\31\0\0\0\24\0\0\0\0\0\0\0\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 131200) = 64
...
oct(\31) is 25 , ie. the FLUSH opcode.
ie, the read syscall is executed, but we never see rx FLUSH
.
FLUSH is normally due to a close() call, not dup(), however, these dup3() calls are overwriting CLOSE_ON_EXEC fds?
Removing the flock in the caller retains the hang.
Here is what happens:
f1, .. := open()
// fd=3
f2, .. := open()
// fd=9
// setup subprocess:
fork()
if child {
// Inherit fd 9 as fd 3.
dup3(9, 3, 0x0);
}
dup3(9, 3, 0x0)
closes the existing fd 3 and then moves fd 9 to the freed up fd 3. The existing fd 3 is FUSE backed, so closing it calls back into us with a FLUSH call. However, the golang runtime doesn't consider that dup3()
can block (AFAICT). So we have 3 threads with work to do:
With GOMAXPROCS=1, we only have one thread which is busy in 1. & 2.
The dup
results in a close, which causes the kernel to fill a buffer for the FLUSH opcode for the fuse device. From the syscall perspective, the kernel completes the read() calls (hence it appears in strace output), but the golang runtime will not schedule the corresponding goroutine to run, because there are no slots available.
ideas:
The latter seems theoretically possible, but not sure if it is practically possible. This would be a change in the linux VFS layer (not the FUSE layer).
I didn't get where the number 3 comes from
On Wed, 8 Nov 2023, 18:32 Han-Wen Nienhuys, @.***> wrote:
Closed #489 https://github.com/hanwen/go-fuse/issues/489 as completed via 27a473d https://github.com/hanwen/go-fuse/commit/27a473d50345f124054dd9cbca44562f68ce15da .
— Reply to this email directly, view it on GitHub https://github.com/hanwen/go-fuse/issues/489#event-10903893105, or unsubscribe https://github.com/notifications/unsubscribe-auth/AACGA73M3RA3Y7LQAXLWAO3YDO64HAVCNFSM6AAAAAA5E2YKJOVHI2DSMVQWIX3LMV45UABCJFZXG5LFIV3GK3TUJZXXI2LGNFRWC5DJN5XDWMJQHEYDGOBZGMYTANI . You are receiving this because you are subscribed to this thread.Message ID: @.***>
Ah, now I do. Fds 0, 1, 2, 3 get dup'ed over. 0, 1, 2 are always dup'ed over when forking, and 3 is from cmd.ExtraFiles
. And 3 is possibly a FUSE-backed fd.
How about this instead:
diff --git a/fuse/test/file_lock_test.go b/fuse/test/file_lock_test.go
index 398f21c..7cbbe62 100644
--- a/fuse/test/file_lock_test.go
+++ b/fuse/test/file_lock_test.go
@@ -70,9 +70,9 @@ func runExternalFlock(flockPath, fname string) ([]byte, error) {
// in order to test the lock property we must use cmd.ExtraFiles (instead of passing the actual file)
// if we were to pass the file then this flock command would fail to place the lock (returning a
// 'file busy' error) as it is already opened and locked at this point (see above)
- cmd := exec.Command(flockPath, "--exclusive", "--nonblock", "3")
+ cmd := exec.Command(flockPath, "--exclusive", "--nonblock", "1")
cmd.Env = append(cmd.Env, "LC_ALL=C") // in case the user's shell language is different
- cmd.ExtraFiles = []*os.File{f}
+ cmd.Stdout = f
return cmd.CombinedOutput()
}
that would probably work too, but also needs a comment. Also, if the test runner does tricks with redirecting stdout, it's possible that fd 1 is actually available and could get assigned to for an open.
GOMAXPROCS=1 ./all.bash
hangs: