chriskohlhoff / asio

Asio C++ Library
http://think-async.com/Asio
4.97k stars 1.22k forks source link

Hang in the Strand impl during shutdown. #1538

Open Wayonb opened 1 month ago

Wayonb commented 1 month ago

Hi,

I am seeing an issue where the task dispatch to a strand is not completing during shutdown. The problem is that the running_in_this_thread always return false. I think strands thinks the task is running on another thread? env: Boost 1.86.0 GCC 12/13 Ubuntu

  // If we are already in the strand then the handler can run immediately.
  if (running_in_this_thread(impl))
  {
    fenced_block b(fenced_block::full);
    static_cast<Handler&&>(handler)();
    return;
  }
bool strand_service::running_in_this_thread(
    const implementation_type& impl) const
{
  return call_stack<strand_impl>::contains(impl) != 0;
}
(gdb) list
91    // Determine whether the specified owner is on the stack. Returns address of
92    // key if present, 0 otherwise.
93    static Value* contains(Key* k)
94    {
95      context* elem = top_;
96      while (elem)
97      {
98        if (elem->key_ == k)
99          return elem->value_;
100       elem = elem->next_;
(gdb) info locals
elem = 0x0

(gdb) info args
k = 0x5555563d0cd0

(gdb) print *k
$22 = {<boost::asio::detail::scheduler_operation> = {next_ = 0x0, func_ = 0x7fffbdaf1bf0 <boost::asio::detail::strand_service::do_complete(void*, boost::asio::detail::scheduler_operation*, boost::system::error_code const&, unsigned long)>, task_result_ = 0}, mutex_ = {<boost::asio::detail::noncopyable> = {<No data fields>}, mutex_ = {__data = {__lock = 0, __count = 0, __owner = 0, __nusers = 0, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "" = {0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000', 0 '\000'}, __align = 0}}, locked_ = true, waiting_queue_ = {<boost::asio::detail::noncopyable> = {<No data fields>}, front_ = 0x0, back_ = 0x0}, ready_queue_ = {<boost::asio::detail::noncopyable> = {<No data fields>}, front_ = 0x7ffdd8000dd0, back_ = 0x7ffdd8000dd0}}

(gdb) print top_
$23 = {<boost::asio::detail::keyword_tss_ptr<boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl>::context>> = {<boost::asio::detail::noncopyable> = {<No data fields>}, static value_ = 0x0}, <No data fields>}
Wayonb commented 1 month ago

If I go up a couple of frames, I see where strand impl is set to the call stack. What am I missing?

180     // Indicate that this strand is executing on the current thread.
181     call_stack<strand_impl>::context ctx(impl);
(gdb) bt
#0  boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::contains (k=0x5555563d0cd0) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/call_stack.hpp:96
#1  0x00005555557392bd in boost::asio::detail::strand_service::running_in_this_thread (this=0x5555563d02c0, impl=@0x7fff44bfe9d8: 0x5555563d0cd0) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/impl/strand_service.ipp:92
#2  0x00005555559f522b in boost::asio::detail::strand_service::dispatch<boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> >(boost::asio::detail::strand_service::implementation_type &, boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> &) (this=0x5555563d02c0, impl=@0x7fff44bfe9d8: 0x5555563d0cd0, handler=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/impl/strand_service.hpp:40
#3  0x00005555559f30fe in boost::asio::io_context::strand::dispatch<boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int>, std::allocator<void> >(boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> &&, const std::allocator<void> &) const (this=0x7fff44bfe9d0, f=..., a=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/io_context_strand.hpp:177
#4  0x00005555559fcd2b in boost::asio::detail::handler_work_base<boost::asio::io_context::strand, boost::asio::io_context::basic_executor_type<std::allocator<void>, 0>, boost::asio::io_context, boost::asio::executor, void>::dispatch<boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int>, boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> >(boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> &, boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> &) (this=0x7fff44bfe9d0, function=..., handler=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/handler_work.hpp:169
#5  0x00005555559fb3af in boost::asio::detail::handler_work<boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int>, boost::asio::io_context::basic_executor_type<std::allocator<void>, 0>, void>::complete<boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> >(boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> &, boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int> &) (this=0x7fff44bfe9d0, function=..., handler=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/handler_work.hpp:437
#6  0x00005555559f92b1 in boost::asio::detail::completion_handler<boost::asio::detail::binder2<boost::asio::ssl::detail::io_op<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor>, boost::asio::ssl::detail::write_op<boost::asio::const_buffer>, boost::asio::detail::write_op<boost::asio::ssl::stream<boost::asio::basic_stream_socket<boost::asio::ip::tcp, boost::asio::executor> >, boost::asio::mutable_buffer, const boost::asio::mutable_buffer*, boost::asio::detail::transfer_all_t, boost::asio::executor_binder<catapult::utils::WrappedWithOwnerDecorator<catapult::ionet::(anonymous namespace)::SocketGuard::closeImpl()::<lambda(const auto:36&, auto:37)> >, boost::asio::io_context::strand> > >, boost::system::error_code, long unsigned int>, boost::asio::io_context::basic_executor_type<std::allocator<void>, 0> >::do_complete(void *, boost::asio::detail::operation *, const boost::system::error_code &, std::size_t) (owner=0x5555560be300, base=0x7fff88002010) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/completion_handler.hpp:74
#7  0x00007fffbdaec868 in boost::asio::detail::scheduler_operation::complete (this=0x7fff88002010, owner=0x5555560be300, ec=..., bytes_transferred=0) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/scheduler_operation.hpp:40
#8  0x00007fffbdaf1c96 in boost::asio::detail::strand_service::do_complete (owner=0x5555560be300, base=0x5555563d0cd0, ec=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/impl/strand_service.ipp:193
#9  0x000055555573548c in boost::asio::detail::scheduler_operation::complete (this=0x5555563d0cd0, owner=0x5555560be300, ec=..., bytes_transferred=0) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/scheduler_operation.hpp:40
#10 0x000055555573873b in boost::asio::detail::scheduler::do_run_one (this=0x5555560be300, lock=..., this_thread=..., ec=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/impl/scheduler.ipp:493
#11 0x000055555573810e in boost::asio::detail::scheduler::run (this=0x5555560be300, ec=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/impl/scheduler.ipp:210
#12 0x0000555555a34cf7 in boost::asio::io_context::run (this=0x5555564b0830) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/impl/io_context.ipp:64
#13 0x0000555555a324a4 in catapult::thread::(anonymous namespace)::DefaultIoThreadPool::ioWorkerFunction (this=0x5555564b07e0) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/src/catapult/thread/IoThreadPool.cpp:124
#14 0x0000555555a310d6 in operator() (__closure=0x555556452858) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/src/catapult/thread/IoThreadPool.cpp:98
#15 0x0000555555a33e42 in std::__invoke_impl<void, catapult::thread::(anonymous namespace)::DefaultIoThreadPool::start()::<lambda()> >(std::__invoke_other, struct {...} &&) (__f=...) at /usr/include/c++/13/bits/invoke.h:61
#16 0x0000555555a33df7 in std::__invoke<catapult::thread::(anonymous namespace)::DefaultIoThreadPool::start()::<lambda()> >(struct {...} &&) (__fn=...) at /usr/include/c++/13/bits/invoke.h:96
#17 0x0000555555a33da4 in std::thread::_Invoker<std::tuple<catapult::thread::(anonymous namespace)::DefaultIoThreadPool::start()::<lambda()> > >::_M_invoke<0>(std::_Index_tuple<0>) (this=0x555556452858) at /usr/include/c++/13/bits/std_thread.h:292
#18 0x0000555555a33d78 in std::thread::_Invoker<std::tuple<catapult::thread::(anonymous namespace)::DefaultIoThreadPool::start()::<lambda()> > >::operator()(void) (this=0x555556452858) at /usr/include/c++/13/bits/std_thread.h:299
warning: RTTI symbol not found for class 'std::thread::_State_impl<std::thread::_Invoker<std::tuple<catapult::thread::(anonymous namespace)::DefaultIoThreadPool::start()::{lambda()#1}> > >'
#19 0x0000555555a33d5c in std::thread::_State_impl<std::thread::_Invoker<std::tuple<catapult::thread::(anonymous namespace)::DefaultIoThreadPool::start()::<lambda()> > > >::_M_run(void) (this=0x555556452850) at /usr/include/c++/13/bits/std_thread.h:244
#20 0x00007ffff54eabb4 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#21 0x00007ffff509ca94 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#22 0x00007ffff5129c3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
(gdb) frame 8
#8  0x00007fffbdaf1c96 in boost::asio::detail::strand_service::do_complete (owner=0x5555560be300, base=0x5555563d0cd0, ec=...) at /home/wayonb/work/code/symbol/symbol_codecov/client/catapult/_deps/boost/include/boost/asio/detail/impl/strand_service.ipp:193
193       o->complete(owner, ec, 0);
(gdb) list
188     // Run all ready handlers. No lock is required since the ready queue is
189     // accessed only within the strand.
190     while (operation* o = impl->ready_queue_.front())
191     {
192       impl->ready_queue_.pop();
193       o->complete(owner, ec, 0);
194     }
195   }
196 }
197 
(gdb) list -10
183     // Ensure the next handler, if any, is scheduled on block exit.
184     on_do_complete_exit on_exit;
185     on_exit.owner_ = static_cast<io_context_impl*>(owner);
186     on_exit.impl_ = impl;
187 
188     // Run all ready handlers. No lock is required since the ready queue is
189     // accessed only within the strand.
190     while (operation* o = impl->ready_queue_.front())
191     {
192       impl->ready_queue_.pop();

(gdb) list -15
173 void strand_service::do_complete(void* owner, operation* base,
174     const boost::system::error_code& ec, std::size_t /*bytes_transferred*/)
175 {
176   if (owner)
177   {
178     strand_impl* impl = static_cast<strand_impl*>(base);
179 
180     // Indicate that this strand is executing on the current thread.
181     call_stack<strand_impl>::context ctx(impl);
182