Closed BewareMyPower closed 9 months ago
Use a single Boost application code to reproduce:
#include <stdio.h>
#include <boost/asio.hpp>
#include <future>
namespace asio = boost::asio;
using asio::io_context;
using asio::ip::tcp;
using boost::system::error_code;
constexpr int kPort = 11111;
static void server_task();
static void client_task();
int main() {
auto server = std::async(std::launch::async, &server_task);
auto client = std::async(std::launch::async, &client_task);
client.get();
server.get();
return 0;
}
static void server_task() {
io_context io;
auto loop = std::async(std::launch::async, [&io] {
io_context::work work{io};
error_code ec;
io.run(ec);
printf("event loop exits with %s\n", ec.message().c_str());
});
tcp::acceptor acceptor{io, tcp::endpoint{tcp::v4(), kPort}};
tcp::socket socket{io};
acceptor.accept(socket);
error_code ec;
size_t total_bytes = 0;
while (true) {
char buf[1024];
auto len = socket.read_some(asio::buffer(buf), ec);
if (ec) {
printf("read complete: %s\n", ec.message().c_str());
break;
}
buf[len] = '\0';
total_bytes += len;
}
printf("Received %zu bytes\n", total_bytes);
socket.close(ec);
acceptor.close(ec);
io.stop();
loop.get();
}
static void client_task() {
io_context io;
tcp::resolver resolver{io};
tcp::resolver::query query{"localhost", std::to_string(kPort)};
tcp::socket socket{io};
asio::connect(socket, resolver.resolve("localhost", std::to_string(kPort)));
std::vector<char> buffer(1048576, 'a');
std::atomic_bool done{false};
asio::write(socket, asio::const_buffer{buffer.data(), buffer.size()});
}
Sometimes it crashed.
* thread #2, stop reason = EXC_BAD_ACCESS (code=1, address=0x88)
frame #0: 0x000000010001b134 tcp_e2e`boost::asio::detail::kqueue_reactor::deregister_descriptor(this=0x0000000000000000, descriptor=13, descriptor_data=0x000000016fe86db8, closing=false) at kqueue_reactor.ipp:350:16
347 EVFILT_READ, EV_DELETE, 0, 0, 0);
348 BOOST_ASIO_KQUEUE_EV_SET(&events[1], descriptor,
349 EVFILT_WRITE, EV_DELETE, 0, 0, 0);
-> 350 ::kevent(kqueue_fd_, events, descriptor_data->num_kevents_, 0, 0, 0);
351 }
352
353 op_queue<operation> ops;
Target 0: (tcp_e2e) stopped.
(lldb) p kqueue_fd_
error: Couldn't apply expression side effects : Couldn't dematerialize a result variable: couldn't read its memory
(lldb) p this
(boost::asio::detail::kqueue_reactor *) $1 = nullptr
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=1, address=0x88)
frame #0: 0x000000010001b134 tcp_e2e`boost::asio::detail::kqueue_reactor::deregister_descriptor(this=0x0000000000000000, descriptor=13, descriptor_data=0x000000016fe86db8, closing=false) at kqueue_reactor.ipp:350:16
frame #1: 0x000000010001c624 tcp_e2e`boost::asio::detail::reactive_socket_service_base::close(this=0x0000600002100c28, impl=0x000000016fe86db0, ec=0x000000016fe86940) at reactive_socket_service_base.ipp:110:14
* frame #2: 0x00000001000065d0 tcp_e2e`boost::asio::basic_socket<boost::asio::ip::tcp, boost::asio::any_io_executor>::close(this=0x000000016fe86da8, ec=0x000000016fe86940) at basic_socket.hpp:547:25
frame #3: 0x0000000100004be4 tcp_e2e`server_task() at tcp_e2e.cc:51:10
frame #4: 0x0000000100024b2c tcp_e2e`decltype(__f=0x0000600003304090)()>()()) std::__1::__invoke[abi:v15006]<void (*)()>(void (*&&)()) at invoke.h:394:23
frame #5: 0x0000000100024b04 tcp_e2e`void std::__1::__async_func<void (*)()>::__execute<>(this=0x0000600003304090, (null)=__tuple_indices<> @ 0x000000016fe86eaf) at future:2169:16
frame #6: 0x0000000100024adc tcp_e2e`std::__1::__async_func<void (*)()>::operator(this=0x0000600003304090)() at future:2162:16
frame #7: 0x00000001000249e4 tcp_e2e`std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::__execute(this=0x0000600003304000) at future:1008:9
frame #8: 0x000000010002527c tcp_e2e`decltype(__f=0x0000600000204108, __a0=0x0000600000204118)()> >*>().*std::declval<void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)()>()()) std::__1::__invoke[abi:v15006]<void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*, void>(void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*&&)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*&&) at invoke.h:359:23
frame #9: 0x00000001000251fc tcp_e2e`void std::__1::__thread_execute[abi:v15006]<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*, 2ul>(__t=size=3, (null)=__tuple_indices<2UL> @ 0x000000016fe86f7f)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*>&, std::__1::__tuple_indices<2ul>) at thread:290:5
frame #10: 0x0000000100024ea8 tcp_e2e`void* std::__1::__thread_proxy[abi:v15006]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*> >(__vp=0x0000600000204100) at thread:301:5
frame #11: 0x00000001a3983fa8 libsystem_pthread.dylib`_pthread_start + 148
I switched from Boost.Asio to Asio 1.28.1 and the error still happened.
FetchContent_Declare(
asio
GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git
GIT_TAG 1f8d154829b902dbc45a651587c6c6df948358e8 # asio 1.28.1
SOURCE_DIR ${CMAKE_SOURCE_DIR}/thirdparty
)
FetchContent_MakeAvailable(asio)
@@ -1,12 +1,11 @@
#include <stdio.h>
-#include <boost/asio.hpp>
+#include <asio.hpp>
#include <future>
-namespace asio = boost::asio;
+using asio::error_code;
using asio::io_context;
using asio::ip::tcp;
-using boost::system::error_code;
constexpr int kPort = 11111;
Backtrace:
(lldb) bt
* thread #2, stop reason = EXC_BAD_ACCESS (code=1, address=0x88)
* frame #0: 0x0000000100013f78 tcp_e2e`asio::detail::kqueue_reactor::deregister_descriptor(this=0x0000000000000000, descriptor=13, descriptor_data=0x000000016fe86db8, closing=false) at kqueue_reactor.ipp:349:16
frame #1: 0x0000000100015480 tcp_e2e`asio::detail::reactive_socket_service_base::close(this=0x0000600001700028, impl=0x000000016fe86db0, ec=0x000000016fe86948) at reactive_socket_service_base.ipp:109:14
frame #2: 0x00000001000024b4 tcp_e2e`asio::basic_socket<asio::ip::tcp, asio::any_io_executor>::close(this=0x000000016fe86da8, ec=0x000000016fe86948) at basic_socket.hpp:546:25
frame #3: 0x0000000100001528 tcp_e2e`server_task() at tcp_e2e.cc:50:10
frame #4: 0x000000010001dadc tcp_e2e`decltype(__f=0x0000600003300090)()>()()) std::__1::__invoke[abi:v15006]<void (*)()>(void (*&&)()) at invoke.h:394:23
frame #5: 0x000000010001dab4 tcp_e2e`void std::__1::__async_func<void (*)()>::__execute<>(this=0x0000600003300090, (null)=__tuple_indices<> @ 0x000000016fe86eaf) at future:2169:16
frame #6: 0x000000010001da8c tcp_e2e`std::__1::__async_func<void (*)()>::operator(this=0x0000600003300090)() at future:2162:16
frame #7: 0x000000010001d994 tcp_e2e`std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::__execute(this=0x0000600003300000) at future:1008:9
frame #8: 0x000000010001e22c tcp_e2e`decltype(__f=0x0000600000201308, __a0=0x0000600000201318)()> >*>().*std::declval<void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)()>()()) std::__1::__invoke[abi:v15006]<void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*, void>(void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*&&)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*&&) at invoke.h:359:23
frame #9: 0x000000010001e1ac tcp_e2e`void std::__1::__thread_execute[abi:v15006]<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*, 2ul>(__t=size=3, (null)=__tuple_indices<2UL> @ 0x000000016fe86f7f)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*>&, std::__1::__tuple_indices<2ul>) at thread:290:5
frame #10: 0x000000010001de58 tcp_e2e`void* std::__1::__thread_proxy[abi:v15006]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, void (std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >::*)(), std::__1::__async_assoc_state<void, std::__1::__async_func<void (*)()> >*> >(__vp=0x0000600000201300) at thread:301:5
frame #11: 0x00000001a3983fa8 libsystem_pthread.dylib`_pthread_start + 148
Close this since the crash is caused by the bad code. The root cause is here:
buf[len] = '\0';
Buffer overflow happens when len >= sizeof(buf)
. And then the content of &socket.impl_.service_
could be modified by the operation above. It can be verified by the following debugging.
frame #0: 0x00000001000013e8 tcp_e2e`server_task() at tcp_e2e.cc:34:12
31
32 tcp::acceptor acceptor{io, tcp::endpoint{tcp::v4(), kPort}};
33 tcp::socket socket{io};
-> 34 acceptor.accept(socket);
35
36 error_code ec;
37 size_t total_bytes = 0;
Target 0: (tcp_e2e) stopped.
(lldb) watchpoint set var &socket.impl_
Watchpoint created: Watchpoint 1: addr = 0x16fe86da8 size = 8 state = enabled type = w
declare @ 'asio-demo/src/tcp_e2e.cc:33'
watchpoint spec = '&socket.impl_'
new value: 0x0000600001700040
(lldb) c
Process 10216 resuming
Watchpoint 1 hit:
old value: 0x0000600001700040
new value: 0x0000600001700000
Process 10216 stopped
* thread #2, stop reason = watchpoint 1
frame #0: 0x00000001000014ec tcp_e2e`server_task() at tcp_e2e.cc:46:20
43 break;
44 }
45 buf[len] = '\0';
-> 46 total_bytes += len;
47 }
48 printf("Received %zu bytes\n", total_bytes);
49
Target 0: (tcp_e2e) stopped.
I'm using Boost asio on macOS m1.
Here is the server side that reads the data synchronously and then breaks the loop if the remote side closes the connection.
I wrote the following Python script to test.
It sends many bytes, otherwise the issue won't be triggered.
There is a little chance (but it's easy to reproduce) that the server side might crash.
As you can see, the cause is that
reactor_
points to a destroyed object indetail/impl/reactive_socket_service_base.ipp
.If I removed the
socket.close()
andacceptor.close()
, there will be a more race case inreactive_socket_service_base::destroy
.