chriskohlhoff / asio

Asio C++ Library
http://think-async.com/Asio
4.97k stars 1.22k forks source link

ssl stream crashed when connection is closing #1522

Open 0luan opened 2 months ago

0luan commented 2 months ago

I have a TLS client based on ASIO. The simplified code is roughly as follows: (Note: All socket operations are protected by mutex, and all ASIO asynchronous callbacks are executed in the same separate threads.)

std::mutex socket_op_lock;
ssl::stream<tcp::socket> ssl_socket;

Connect() {
    ssl_socket.next_layer().async_connect(on_connect);
}

on_connect(const asio::error_code& error) {
    std::lock_guard<std::mutex> socket_op_lock;
    if (error) {
        CloseSocket();
    }
    else {
        ssl_socket.async_handshake(on_handshake);
    }
}

on_handshake(const asio::error_code& error) {
    std::lock_guard<std::mutex> socket_op_lock;
    if (error) {
        CloseSocket();
    }
    else {
        Receive();
    }
}

Receive() {
    // called internal, already locked
    async_read(ssl_socket, ..., on_read);
}

on_read() {
    std::lock_guard<std::mutex> socket_op_lock;
    if (error) {
        CloseSocket();
    }
    else {
        Receive();
    }
}

Disconnect() {
    std::lock_guard<std::mutex> socket_op_lock;
    CloseSocket();
}

CloseSocket() {
    // called internal, already locked
    ssl_socket.next_layer().shutdown();
    ssl_socket.next_layer().close();
}

The problem occurs when the connection is closing. Since the ssl::stream shutdown might be blocking, I directly close the underlying TCP connection. However, I encountered the following occasional crashes on Android/iOS:

0  MyApp  asio::detail::kqueue_reactor::start_op(int, int, asio::detail::kqueue_reactor::descriptor_state*&, asio::detail::reactor_op*, bool, bool) + 108
1  MyApp  asio::detail::kqueue_reactor::start_op(int, int, asio::detail::kqueue_reactor::descriptor_state*&, asio::detail::reactor_op*, bool, bool) + 100
2  MyApp  void asio::detail::reactive_socket_service_base::async_receive<asio::mutable_buffers_1, asio::ssl::detail::io_op<asio::basic_stream_socket<asio::ip::tcp>, asio::ssl::detail::handshake_op, std::__1::__bind<void (MyApp::MyTcpClient::*)(std::__1::error_code const&), std::__1::shared_ptr<MyApp::MyTcpClient>, std::__1::placeholders::__ph<1> const&> > >(asio::detail::reactive_socket_service_base::base_implementation_type&, asio::mutable_buffers_1 const&, int, asio::ssl::detail::io_op<asio::basic_stream_socket<asio::ip::tcp>, asio::ssl::detail::handshake_op, std::__1::__bind<void (MyApp::MyTcpClient::*)(std::__1::error_code const&), std::__1::shared_ptr<MyApp::MyTcpClient>, std::__1::placeholders::__ph<1> const&> >&) + 384
3  MyApp  asio::ssl::detail::io_op<asio::basic_stream_socket<asio::ip::tcp>, asio::ssl::detail::handshake_op, std::__1::__bind<void (MyApp::MyTcpClient::*)(std::__1::error_code const&), std::__1::shared_ptr<MyApp::MyTcpClient>, std::__1::placeholders::__ph<1> const&> >::operator()(std::__1::error_code, unsigned long, int) + 900
4  MyApp  asio::detail::reactive_socket_send_op<asio::const_buffers_1, asio::detail::write_op<asio::basic_stream_socket<asio::ip::tcp>, asio::mutable_buffer, asio::mutable_buffer const*, asio::detail::transfer_all_t, asio::ssl::detail::io_op<asio::basic_stream_socket<asio::ip::tcp>, asio::ssl::detail::handshake_op, std::__1::__bind<void (MyApp::MyTcpClient::*)(std::__1::error_code const&), std::__1::shared_ptr<MyApp::MyTcpClient>, std::__1::placeholders::__ph<1> const&> > > >::do_complete(void*, asio::detail::scheduler_operation*, std::__1::error_code const&, unsigned long) + 228
5  MyApp  asio::detail::scheduler::do_run_one(asio::detail::conditionally_enabled_mutex::scoped_lock&, asio::detail::scheduler_thread_info&, std::__1::error_code const&) + 692
6  MyApp  asio::detail::scheduler::run(std::__1::error_code&) + 188
7  MyApp  MyApp::NetworkIoThread::CustomLoop() + 44
8  MyApp  MyApp::OMMsgThread::Run() + 96
9  libsystem_pthread.dylib  __pthread_start + 136

So I guessed that after closing the underlying TCP connection, some operations in ssl::stream might still be triggered because the ssl::stream shutdown was not explicitly invoked, which led to the crashes. I then added the SSL layer shutdown process, and the code became like this:

Disconnect() {
    std::lock_guard<std::mutex> socket_op_lock;
    ssl_socket.async_shutdown(on_shutdown);
}

on_shutdown() {
    std::lock_guard<std::mutex> socket_op_lock;
    CloseSocket();
}

After making this change, there were more crashes on the iOS platform, with the following stack trace:

0  MyApp  CRYPTO_memcmp + 76
1  MyApp  aes_gcm_tls_cipher + 624
2  MyApp  aes_gcm_cipher + 96
3  MyApp  EVP_Cipher + 64
4  MyApp  tls1_enc + 2748
5  MyApp  ssl3_get_record + 3220
6  MyApp  ssl3_read_bytes + 744
7  MyApp  ssl3_read_internal + 128
8  MyApp  ssl3_read + 52
9  MyApp  ssl_read_internal + 340
10 MyApp  SSL_read + 96
11 MyApp  asio::ssl::detail::engine::perform(int (asio::ssl::detail::engine::*)(void*, unsigned long), void*, unsigned long, std::__1::error_code&, unsigned long*) + 96
12 MyApp  asio::ssl::detail::io_op<asio::basic_stream_socket<asio::ip::tcp>, asio::ssl::detail::read_op<asio::mutable_buffers_1>, asio::detail::read_dynbuf_op<asio::ssl::stream<asio::basic_stream_socket<asio::ip::tcp> >, asio::basic_streambuf_ref<std::__1::allocator<char> >, asio::detail::transfer_at_least_t, std::__1::__bind<void (MyApp::MyTcpClient::*)(std::__1::error_code const&, unsigned long), std::__1::shared_ptr<MyApp::MyTcpClient>, std::__1::placeholders::__ph<1> const&, std::__1::placeholders::__ph<2> const&> > >::operator()(std::__1::error_code, unsigned long, int) + 92
13 MyApp  asio::detail::reactive_socket_recv_op<asio::mutable_buffers_1, asio::ssl::detail::io_op<asio::basic_stream_socket<asio::ip::tcp>, asio::ssl::detail::read_op<asio::mutable_buffers_1>, asio::detail::read_dynbuf_op<asio::ssl::stream<asio::basic_stream_socket<asio::ip::tcp> >, asio::basic_streambuf_ref<std::__1::allocator<char> >, asio::detail::transfer_at_least_t, std::__1::__bind<void (MyApp::MyTcpClient::*)(std::__1::error_code const&, unsigned long), std::__1::shared_ptr<MyApp::MyTcpClient>, std::__1::placeholders::__ph<1> const&, std::__1::placeholders::__ph<2> const&> > >::do_complete(void*, asio::detail::scheduler_operation*, std::__1::error_code const&, unsigned long) + 196
14 MyApp  asio::detail::scheduler::do_run_one(asio::detail::conditionally_enabled_mutex::scoped_lock&, asio::detail::scheduler_thread_info&, std::__1::error_code const&) + 692
15 MyApp  asio::detail::scheduler::run(std::__1::error_code&) + 188
16 MyApp  MyApp::NetworkIoThread::CustomLoop() + 44
17 MyApp  MyApp::OMMsgThread::Run() + 96
18 libsystem_pthread.dylib  __pthread_start + 148

However, it is strange that there are no issues on the Android platform.

Why is this happening, and are there any one who have any suggestions on how to solve this problem?

Thanks!