Closed h0hmj closed 2 years ago
rebuild curve-fuse with latest commit, with patch #1399 still stuck
(gdb) thread apply all bt
Thread 3 (Thread 0x7f86057fa700 (LWP 740)):
#0 __libc_read (nbytes=135168, buf=0x7f86601d0010, fd=10) at ../sysdeps/unix/sysv/linux/read.c:26
#1 __libc_read (fd=10, buf=0x7f86601d0010, nbytes=135168) at ../sysdeps/unix/sysv/linux/read.c:24
#2 0x00007f866a63566d in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
#3 0x00007f866a63001f in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
#4 0x00007f866a569fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
#5 0x00007f8669bcd4cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 2 (Thread 0x7f8604ff9700 (LWP 739)):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x000055e9ec1f0c41 in bthread::futex_wait_private (addr1=0x7f8604ff7c30, expected=0, timeout=0x0) at external/com_github_apache_brpc/src/bthread/sys_futex.h:37
#2 0x000055e9ec21c6f4 in bthread::wait_pthread (pw=..., ptimeout=0x0) at external/com_github_apache_brpc/src/bthread/butex.cpp:138
#3 0x000055e9ec21d53b in bthread::butex_wait_from_pthread (g=0x0, b=0x7f8664011080, expected_value=1, abstime=0x0) at external/com_github_apache_brpc/src/bthread/butex.cpp:585
#4 0x000055e9ec21d6eb in bthread::butex_wait (arg=0x7f8664011080, expected_value=1, abstime=0x0) at external/com_github_apache_brpc/src/bthread/butex.cpp:618
#5 0x000055e9ec20f1d7 in bthread_id_join (id=...) at external/com_github_apache_brpc/src/bthread/id.cpp:526
#6 0x000055e9ebf85b79 in brpc::Join (id=...) at external/com_github_apache_brpc/src/brpc/controller.cpp:527
#7 0x000055e9ebf7fc4e in brpc::Channel::CallMethod (this=0x7f8604ff8120, method=0x55e9ee906a88, controller_base=0x7f8604ff8240, request=0x7f8604ff7f00, response=0x7f8604ff7fa0, done=0x0) at external/com_github_apache_brpc/src/brpc/channel.cpp:543
#8 0x000055e9eba20532 in curvefs::mds::MdsService_Stub::MountFs (this=0x7f8604ff7ee0, controller=0x7f8604ff8240, request=0x7f8604ff7f00, response=0x7f8604ff7fa0, done=0x0) at bazel-out/k8-dbg/bin/curvefs/proto/mds.pb.cc:6809
#9 0x000055e9eb929019 in curvefs::client::rpcclient::MDSBaseClient::MountFs (this=0x55e9ee81acf0, fsName="pvc-8dcfda2b-3028-4abe-a445-73dd5d8f69eb", mountPt="e4023b00be57:9000:/mnt/test3", response=0x7f8604ff7fa0, cntl=0x7f8604ff8240,
channel=0x7f8604ff8120) at curvefs/src/client/rpcclient/base_client.cpp:39
#10 0x000055e9eb95d8e4 in curvefs::client::rpcclient::MdsClientImpl::<lambda(int, uint64_t, brpc::Channel*, brpc::Controller*)>::operator()(int, uint64_t, brpc::Channel *, brpc::Controller *) const (__closure=0x7f8664001c90, addrindex=0,
rpctimeoutMS=500, channel=0x7f8604ff8120, cntl=0x7f8604ff8240) at curvefs/src/client/rpcclient/mds_client.cpp:64
#11 0x000055e9eb962b0d in std::_Function_handler<int(int, long unsigned int, brpc::Channel*, brpc::Controller*), curvefs::client::rpcclient::MdsClientImpl::MountFs(const string&, const string&, curvefs::mds::FsInfo*)::<lambda(int, uint64_t, brpc::Channel*, brpc::Controller*)> >::_M_invoke(const std::_Any_data &, int &&, unsigned long &&, brpc::Channel *&&, brpc::Controller *&&) (__functor=..., __args#0=@0x7f8604ff80a4: 0, __args#1=@0x7f8604ff8098: 500, __args#2=@0x7f8604ff8090: 0x7f8604ff8120,
__args#3=@0x7f8604ff8088: 0x7f8604ff8240) at /usr/include/c++/8/bits/std_function.h:282
#12 0x000055e9ebde99ed in std::function<int (int, unsigned long, brpc::Channel*, brpc::Controller*)>::operator()(int, unsigned long, brpc::Channel*, brpc::Controller*) const (this=0x7f8604ff8570, __args#0=0, __args#1=500, __args#2=0x7f8604ff8120,
__args#3=0x7f8604ff8240) at /usr/include/c++/8/bits/std_function.h:687
#13 0x000055e9ebdd845c in curve::client::RPCExcutorRetryPolicy::ExcuteTask(int, unsigned long, std::function<int (int, unsigned long, brpc::Channel*, brpc::Controller*)>) (this=0x55e9ee915430, mdsindex=0, rpcTimeOutMS=500, task=...)
at src/client/mds_client.cpp:209
#14 0x000055e9ebdd7f78 in curve::client::RPCExcutorRetryPolicy::DoRPCTask(std::function<int (int, unsigned long, brpc::Channel*, brpc::Controller*)>, unsigned long) (this=0x55e9ee915430, rpctask=..., maxRetryTimeMS=16000) at src/client/mds_client.cpp:73
#15 0x000055e9eb95dc4c in curvefs::client::rpcclient::MdsClientImpl::MountFs (this=0x55e9ee915420, fsName="pvc-8dcfda2b-3028-4abe-a445-73dd5d8f69eb", mountPt="e4023b00be57:9000:/mnt/test3", fsInfo=0x55e9ee81aae0)
at curvefs/src/client/rpcclient/mds_client.cpp:83
#16 0x000055e9eb85ae3b in curvefs::client::FuseClient::FuseOpInit (this=0x55e9ee8f2a40, userdata=0x7ffe0757ea90, conn=0x55e9eea6e9b4) at curvefs/src/client/fuse_client.cpp:183
#17 0x000055e9eb87c8c4 in curvefs::client::FuseS3Client::FuseOpInit (this=0x55e9ee8f2a40, userdata=0x7ffe0757ea90, conn=0x55e9eea6e9b4) at curvefs/src/client/fuse_s3_client.cpp:87
#18 0x000055e9eb8276a5 in FuseOpInit (userdata=0x7ffe0757ea90, conn=0x55e9eea6e9b4) at curvefs/src/client/curve_fuse_op.cpp:214
#19 0x00007f866a6331a2 in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
#20 0x00007f866a634626 in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
#21 0x00007f866a630091 in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
#22 0x00007f866a569fa3 in start_thread (arg=<optimized out>) at pthread_create.c:486
#23 0x00007f8669bcd4cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 1 (Thread 0x7f8668d55c80 (LWP 738)):
#0 futex_abstimed_wait_cancelable (private=0, abstime=0x0, expected=0, futex_word=0x7ffe0757e9a0) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1 do_futex_wait (sem=sem@entry=0x7ffe0757e9a0, abstime=0x0) at sem_waitcommon.c:111
#2 0x00007f866a572988 in __new_sem_wait_slow (sem=0x7ffe0757e9a0, abstime=0x0) at sem_waitcommon.c:181
#3 0x00007f866a6302b8 in fuse_session_loop_mt () from /lib/x86_64-linux-gnu/libfuse3.so.3
#4 0x000055e9eb8263b8 in main (argc=12, argv=0x7ffe0757eba8) at curvefs/src/client/main.c:151
Describe the bug (描述bug)
running curve-fuse in container will stuck
To Reproduce (复现方法)
docker run --name xxx --entrypoint=/bin/bash --privileged -it -d harbor.cloud.netease.com/curve/curvefs:csi-test
For help, type "help". Type "apropos word" to search for commands related to "word". Attaching to process 638 [New LWP 639] [New LWP 640] [New LWP 641] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". futex_abstimed_wait_cancelable (private=0, abstime=0x0, expected=0, futex_word=0x7ffc8feb8e50) at ../sysdeps/unix/sysv/linux/futex-internal.h:205 205 ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory. (gdb) bt
0 futex_abstimed_wait_cancelable (private=0, abstime=0x0, expected=0, futex_word=0x7ffc8feb8e50) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
1 do_futex_wait (sem=sem@entry=0x7ffc8feb8e50, abstime=0x0) at sem_waitcommon.c:111
2 0x00007fe5b5574988 in __new_sem_wait_slow (sem=0x7ffc8feb8e50, abstime=0x0) at sem_waitcommon.c:181
3 0x00007fe5b56322b8 in fuse_session_loop_mt () from /lib/x86_64-linux-gnu/libfuse3.so.3
4 0x0000556051c49c38 in main (argc=12, argv=0x7ffc8feb9058) at curvefs/src/client/main.c:151
(gdb) thread apply 640 bt warning: Unknown thread 640 (gdb) thread apply 2 bt
Thread 2 (Thread 0x7fe557fff700 (LWP 639)):
0 futex_wait (private=, expected=3, futex_word=0x5560551d8484) at ../sysdeps/unix/sysv/linux/futex-internal.h:61
1 futex_wait_simple (private=, expected=3, futex_word=0x5560551d8484) at ../sysdeps/nptl/futex-internal.h:135
2 __condvar_quiesce_and_switch_g1 (private=, g1index=, wseq=, cond=0x5560551d8470) at pthread_cond_common.c:416
3 __pthread_cond_signal (cond=0x5560551d8470) at pthread_cond_signal.c:78
4 0x00007fe5b4d693d9 in std::condition_variable::notify_one() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
5 0x0000556052825749 in google::AsyncLogger::Write (this=0x5560551d83f0, force_flush=true, timestamp=1652152132,
6 0x000055605281e3b0 in google::LogDestination::MaybeLogToLogfile (logfile_severity=0, timestamp=1652152132,
7 0x000055605281e44b in google::LogDestination::LogToAllLogfiles (severity=3, timestamp=1652152132,
8 0x0000556052818dc9 in google::LogMessage::SendToLog (this=0x7fe557ffe780) at external/com_github_google_glog/src/logging.cc:1516
9 0x0000556052818915 in google::LogMessage::Flush (this=0x7fe557ffe780) at external/com_github_google_glog/src/logging.cc:1434
10 0x000055605281b888 in google::LogMessageFatal::~LogMessageFatal (this=0x7fe557ffe780, __in_chrg=) at external/com_github_google_glog/src/logging.cc:2190
11 0x0000556051c4af7d in FuseOpInit (userdata=0x7ffc8feb8f40, conn=0x5560554d8d54) at curvefs/src/client/curve_fuse_op.cpp:216
12 0x00007fe5b56351a2 in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
13 0x00007fe5b5636626 in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
14 0x00007fe5b5632091 in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
15 0x00007fe5b556bfa3 in start_thread (arg=) at pthread_create.c:486
16 0x00007fe5b4bcf4cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) thread apply 3 bt
Thread 3 (Thread 0x7fe55cff9700 (LWP 640)):
0 __libc_read (nbytes=135168, buf=0x7fe5981d0010, fd=10) at ../sysdeps/unix/sysv/linux/read.c:26
1 __libc_read (fd=10, buf=0x7fe5981d0010, nbytes=135168) at ../sysdeps/unix/sysv/linux/read.c:24
2 0x00007fe5b563766d in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
3 0x00007fe5b563201f in ?? () from /lib/x86_64-linux-gnu/libfuse3.so.3
4 0x00007fe5b556bfa3 in start_thread (arg=) at pthread_create.c:486
5 0x00007fe5b4bcf4cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) thread apply 1 bt
Thread 1 (Thread 0x7fe5b3d57c80 (LWP 638)):
0 futex_abstimed_wait_cancelable (private=0, abstime=0x0, expected=0, futex_word=0x7ffc8feb8e50) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
1 do_futex_wait (sem=sem@entry=0x7ffc8feb8e50, abstime=0x0) at sem_waitcommon.c:111
2 0x00007fe5b5574988 in __new_sem_wait_slow (sem=0x7ffc8feb8e50, abstime=0x0) at sem_waitcommon.c:181
3 0x00007fe5b56322b8 in fuse_session_loop_mt () from /lib/x86_64-linux-gnu/libfuse3.so.3
4 0x0000556051c49c38 in main (argc=12, argv=0x7ffc8feb9058) at curvefs/src/client/main.c:151
(gdb) thread apply 4 bt
Thread 4 (Thread 0x7fe55d7fa700 (LWP 641)):
0 futex_wait (private=0, expected=, futex_word=0x5560551d8490) at ../sysdeps/unix/sysv/linux/futex-internal.h:61
1 futex_wait_simple (private=0, expected=, futex_word=0x5560551d8490) at ../sysdeps/nptl/futex-internal.h:135
2 __condvar_acquire_lock (private=0, cond=0x5560551d8470) at pthread_cond_common.c:280
3 __condvar_cancel_waiting (cond=0x5560551d8470, seq=22, g=, private=0) at pthread_cond_wait.c:86
4 0x00007fe5b5572497 in __pthread_cond_wait_common (abstime=0x7fe55d7f97b0, mutex=0x5560551d8448, cond=0x5560551d8470) at pthread_cond_wait.c:554
5 __pthread_cond_timedwait (cond=0x5560551d8470, mutex=0x5560551d8448, abstime=0x7fe55d7f97b0) at pthread_cond_wait.c:667
6 0x0000556051c8b8ee in gthread_cond_timedwait (cond=0x5560551d8470, mutex=0x5560551d8448, abs_timeout=0x7fe55d7f97b0) at /usr/include/x86_64-linux-gnu/c++/8/bits/gthr-default.h:871
7 0x0000556051c934ab in std::condition_variable::wait_until_impl<std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (this=0x5560551d8470, lock=..., __atime=...) at /usr/include/c++/8/condition_variable:178
8 0x0000556051c91325 in std::condition_variable::wait_until<std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (this=0x5560551d8470, lock=..., atime=...) at /usr/include/c++/8/condition_variable:106
9 0x00005560528263fc in std::condition_variable::wait_for<long, std::ratio<1l, 1l> > (this=0x5560551d8470, lock=..., rtime=...) at /usr/include/c++/8/condition_variable:143
10 0x00005560528258e1 in google::AsyncLogger::RunThread (this=0x5560551d83f0) at external/com_github_google_glog/src/logging_async.cc:161
11 0x0000556052826dae in std::__invoke_impl<void, void (google::AsyncLogger::)(), google::AsyncLogger> (__f=@0x5560551d8540: (void (google::AsyncLogger::)(class google::AsyncLogger const)) 0x5560528257d8 <google::AsyncLogger::RunThread()>,
12 0x0000556052826257 in std::invoke<void (google::AsyncLogger::)(), google::AsyncLogger> (fn=@0x5560551d8540: (void (google::AsyncLogger::)(class google::AsyncLogger const)) 0x5560528257d8 <google::AsyncLogger::RunThread()>,
13 0x0000556052827e9d in std::thread::_Invoker<std::tuple<void (google::AsyncLogger::)(), google::AsyncLogger> >::_M_invoke<0ul, 1ul> (this=0x5560551d8538) at /usr/include/c++/8/thread:244
14 0x0000556052827e58 in std::thread::_Invoker<std::tuple<void (google::AsyncLogger::)(), google::AsyncLogger> >::operator() (this=0x5560551d8538) at /usr/include/c++/8/thread:253
15 0x0000556052827e3c in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (google::AsyncLogger::)(), google::AsyncLogger> > >::_M_run (this=0x5560551d8530) at /usr/include/c++/8/thread:196
16 0x00007fe5b4d6eb2f in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
17 0x00007fe5b556bfa3 in start_thread (arg=) at pthread_create.c:486
18 0x00007fe5b4bcf4cf in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95