zeromq / azmq

C++ language binding library integrating ZeroMQ with Boost Asio
Boost Software License 1.0
319 stars 109 forks source link

Passing an asio::buffer to async_send leads to a crash. #145

Closed JenSte closed 3 years ago

JenSte commented 6 years ago

When a boost::asio::buffer is passed to a socket's async_send(), a crash occurs somewhere deep down in asio/azmq when the azmq::message is constructed. No crash occurs if the azmq::message is constructed from the same underlying object and then passed to async_send().

Demonstration program:

#include <azmq/socket.hpp>
#include <iostream>
#include <string>
#include <memory>

class sender
{
public:

    sender(
        boost::asio::io_service& io,
        bool crash,
        std::string endpoint,
        std::string message)
        : socket_(io)
        , crash_(crash)
        , string_send_buffer_(std::move(message))
    {
        socket_.bind(endpoint);
    }

    void start_sending()
    {
        auto handler = std::bind(
            &sender::async_send_handler,
            this,
            std::placeholders::_1,
            std::placeholders::_2);

        if (crash_) {
            // Somewhere deep down in asio/azmq a crash happens
            // when an azmq::message is constructed from this buffer.
            socket_.async_send(
                boost::asio::buffer(string_send_buffer_),
                std::move(handler));
        } else {
            // When we construct the message ourselves, no crash occurs.
            message_send_buffer_ = std::make_unique<azmq::message>(
                boost::asio::buffer(string_send_buffer_));

            socket_.async_send(
                *message_send_buffer_,
                std::move(handler));
        }
    }

private:

    void async_send_handler(
        const boost::system::error_code& ec,
        std::size_t bytes_transferred)
    {
        if (ec) {
            if (ec != boost::system::errc::operation_canceled) {
                std::cerr << "error sending: " << ec.message();
            }

            return;
        }

        std::cout << '.' << std::flush;

        start_sending();
    }

    bool crash_;

    azmq::pair_socket socket_;

    std::string string_send_buffer_;

    std::unique_ptr<azmq::message> message_send_buffer_;
};

int main()
{
    int major, minor, patch;
    zmq_version(&major, &minor, &patch);

    std::cout << "boost version: " << BOOST_LIB_VERSION << '\n'
              << "zmq version: " << major << '.' << minor << '.' << patch << '\n'
              << "azmq version: git-a8f54cc8\n"
              << "gcc version: " << __GNUC__ << '.' << __GNUC_MINOR__ << '.' << __GNUC_PATCHLEVEL__ << std::endl;

    boost::asio::io_service io;

    bool crash = true;
    //bool crash = false;

    sender s(io, crash, "tcp://0.0.0.0:10000", "foo");
    s.start_sending();

    io.run();
}

As the crash only happens when the socket is connected, the following python script is started before the C++ program:

#!/usr/bin/env python3

import zmq

context = zmq.Context()

socket = context.socket(zmq.PAIR)
socket.connect("tcp://127.0.0.1:10000")

while True:
    message = socket.recv()
    print(repr(message))

Then, when the C++ program is executed:

boost version: 1_63
zmq version: 4.1.6
azmq version: git-a8f54cc8
gcc version: 7.3.1
=================================================================
==18917==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffc00d67c58 at pc 0x0000004581c3 bp 0x7ffc00d671b0 sp 0x7ffc00d671a0
READ of size 8 at 0x7ffc00d67c58 thread T0
    #0 0x4581c2 in boost::asio::detail::buffer_size_helper(boost::asio::const_buffer const&) /usr/include/boost/asio/buffer.hpp:281
    #1 0x458299 in boost::asio::buffer_size(boost::asio::const_buffer const&) /usr/include/boost/asio/buffer.hpp:372
    #2 0x4592b8 in azmq::message::message(boost::asio::const_buffer const&) /home/jens/azmq-crash/azmq/azmq/detail/../message.hpp:68
    #3 0x499b31 in boost::enable_if<boost::has_range_const_iterator<boost::asio::const_buffers_1>, unsigned long>::type azmq::detail::socket_ops::send<boost::asio::const_buffers_1>(boost::asio::const_buffers_1 const&, std::unique_ptr<void, azmq::detail::socket_ops::socket_close>&, int, boost::system::error_code&) /home/jens/azmq-crash/azmq/azmq/detail/socket_ops.hpp:282
    #4 0x494ed1 in azmq::detail::send_buffer_op_base<boost::asio::const_buffers_1>::do_perform(azmq::detail::reactor_op*, std::unique_ptr<void, azmq::detail::socket_ops::socket_close>&) /home/jens/azmq-crash/azmq/azmq/detail/send_op.hpp:38
    #5 0x46745a in azmq::detail::reactor_op::do_perform(std::unique_ptr<void, azmq::detail::socket_ops::socket_close>&) /home/jens/azmq-crash/azmq/azmq/detail/reactor_op.hpp:29
    #6 0x4681c1 in azmq::detail::socket_service::per_descriptor_data::perform_ops(boost::intrusive::list<azmq::detail::reactor_op, boost::intrusive::member_hook<azmq::detail::reactor_op, boost::intrusive::list_member_hook<void, void, void>, &azmq::detail::reactor_op::member_hook_>, void, void, void>&, boost::system::error_code&) /home/jens/azmq-crash/azmq/azmq/detail/socket_service.hpp:118
    #7 0x46a261 in azmq::detail::socket_service::reactor_handler::operator()(boost::system::error_code, unsigned long) const /home/jens/azmq-crash/azmq/azmq/detail/socket_service.hpp:597
    #8 0x49d7cf in boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>::operator()() /usr/include/boost/asio/detail/bind_handler.hpp:127
    #9 0x49bec8 in void boost::asio::asio_handler_invoke<boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long> >(boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>&, ...) /usr/include/boost/asio/handler_invoke_hook.hpp:69
    #10 0x4995ce in void boost_asio_handler_invoke_helpers::invoke<boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>, azmq::detail::socket_service::reactor_handler>(boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>&, azmq::detail::socket_service::reactor_handler&) /usr/include/boost/asio/detail/handler_invoke_helpers.hpp:37
    #11 0x4947ce in boost::asio::detail::reactive_null_buffers_op<azmq::detail::socket_service::reactor_handler>::do_complete(boost::asio::detail::task_io_service*, boost::asio::detail::task_io_service_operation*, boost::system::error_code const&, unsigned long) /usr/include/boost/asio/detail/reactive_null_buffers_op.hpp:75
    #12 0x45b792 in boost::asio::detail::task_io_service_operation::complete(boost::asio::detail::task_io_service&, boost::system::error_code const&, unsigned long) /usr/include/boost/asio/detail/task_io_service_operation.hpp:38
    #13 0x460b92 in boost::asio::detail::epoll_reactor::descriptor_state::do_complete(boost::asio::detail::task_io_service*, boost::asio::detail::task_io_service_operation*, boost::system::error_code const&, unsigned long) /usr/include/boost/asio/detail/impl/epoll_reactor.ipp:651
    #14 0x45b792 in boost::asio::detail::task_io_service_operation::complete(boost::asio::detail::task_io_service&, boost::system::error_code const&, unsigned long) /usr/include/boost/asio/detail/task_io_service_operation.hpp:38
    #15 0x462725 in boost::asio::detail::task_io_service::do_run_one(boost::asio::detail::scoped_lock<boost::asio::detail::posix_mutex>&, boost::asio::detail::task_io_service_thread_info&, boost::system::error_code const&) /usr/include/boost/asio/detail/impl/task_io_service.ipp:372
    #16 0x461a2e in boost::asio::detail::task_io_service::run(boost::system::error_code&) /usr/include/boost/asio/detail/impl/task_io_service.ipp:149
    #17 0x462c74 in boost::asio::io_service::run() /usr/include/boost/asio/impl/io_service.ipp:59
    #18 0x45454e in main /home/jens/azmq-crash/main.cpp:93
    #19 0x7fafe7222889 in __libc_start_main (/lib64/libc.so.6+0x20889)
    #20 0x453e79 in _start (/home/jens/azmq-crash/build/crash+0x453e79)

Address 0x7ffc00d67c58 is located in stack of thread T0 at offset 280 in frame
    #0 0x4617cd in boost::asio::detail::task_io_service::run(boost::system::error_code&) /usr/include/boost/asio/detail/impl/task_io_service.ipp:134

  This frame has 4 object(s):
    [32, 48) '<unknown>'
    [96, 112) 'lock'
    [160, 184) 'ctx'
    [224, 256) 'this_thread' <== Memory access at offset 280 overflows this variable
HINT: this may be a false positive if your program uses some custom stack unwind mechanism or swapcontext
      (longjmp and C++ exceptions *are* supported)
SUMMARY: AddressSanitizer: stack-buffer-overflow /usr/include/boost/asio/buffer.hpp:281 in boost::asio::detail::buffer_size_helper(boost::asio::const_buffer const&)
Shadow bytes around the buggy address:
  0x1000001a4f30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x1000001a4f40: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 f8 f8 f8 f2
  0x1000001a4f50: f2 f2 f2 f2 00 00 00 f2 f3 f3 f3 f3 00 00 00 00
  0x1000001a4f60: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 f8 f8 f2 f2
  0x1000001a4f70: f2 f2 f2 f2 00 00 f2 f2 f2 f2 f2 f2 00 00 00 f2
=>0x1000001a4f80: f2 f2 f2 f2 00 00 00 00 f3 f3 f3[f3]00 00 00 00
  0x1000001a4f90: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 f2 f2
  0x1000001a4fa0: f3 f3 f3 f3 00 00 00 00 00 00 00 00 f1 f1 f1 f1
  0x1000001a4fb0: f8 f2 f2 f2 f2 f2 f2 f2 f8 f2 f2 f2 f2 f2 f2 f2
  0x1000001a4fc0: 04 f2 f2 f2 f2 f2 f2 f2 04 f2 f2 f2 f2 f2 f2 f2
  0x1000001a4fd0: 04 f2 f2 f2 f2 f2 f2 f2 00 00 f2 f2 f2 f2 f2 f2
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==18917==ABORTING

The same program, but compiled without address sanitizer. Instead the stack trace is generated with GDB:

(gdb) bt
#0  0x00007f397708e1b4 in __memmove_sse2_unaligned_erms () from /lib64/libc.so.6
#1  0x0000000000454d85 in boost::asio::buffer_copy (target=..., source=...) at /usr/include/boost/asio/buffer.hpp:1303
#2  0x0000000000454db3 in boost::asio::buffer_copy (target=..., source=...) at /usr/include/boost/asio/buffer.hpp:1446
#3  0x00000000004553e1 in azmq::message::message (this=0x7fffb6a8ca10, buffer=...) at /home/jens/azmq-crash/azmq/azmq/detail/../message.hpp:72
#4  0x0000000000470e63 in azmq::detail::socket_ops::send<boost::asio::const_buffers_1> (buffers=..., socket=std::unique_ptr<void> containing 0x15f5f50, flags=1, ec=...) at /home/jens/azmq-crash/azmq/azmq/detail/socket_ops.hpp:282
#5  0x000000000046eae7 in azmq::detail::send_buffer_op_base<boost::asio::const_buffers_1>::do_perform (base=0x15f7fd0, socket=std::unique_ptr<void> containing 0x15f5f50) at /home/jens/azmq-crash/azmq/azmq/detail/send_op.hpp:38
#6  0x000000000045ac06 in azmq::detail::reactor_op::do_perform (this=0x15f7fd0, socket=std::unique_ptr<void> containing 0x15f5f50) at /home/jens/azmq-crash/azmq/azmq/detail/reactor_op.hpp:29
#7  0x000000000045b1ef in azmq::detail::socket_service::per_descriptor_data::perform_ops (this=0x15f2cf0, ops=..., ec=...) at /home/jens/azmq-crash/azmq/azmq/detail/socket_service.hpp:118
#8  0x000000000045c00a in azmq::detail::socket_service::reactor_handler::operator() (this=0x7fffb6a8cd30, ec=...) at /home/jens/azmq-crash/azmq/azmq/detail/socket_service.hpp:597
#9  0x000000000047306d in boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>::operator() (this=0x7fffb6a8cd30) at /usr/include/boost/asio/detail/bind_handler.hpp:127
#10 0x00000000004720bc in boost::asio::asio_handler_invoke<boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long> > (function=...) at /usr/include/boost/asio/handler_invoke_hook.hpp:69
#11 0x0000000000470ae9 in boost_asio_handler_invoke_helpers::invoke<boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>, azmq::detail::socket_service::reactor_handler> (function=..., context=...) at /usr/include/boost/asio/detail/handler_invoke_helpers.hpp:37
#12 0x000000000046e75a in boost::asio::detail::reactive_null_buffers_op<azmq::detail::socket_service::reactor_handler>::do_complete (owner=0x15f2080, base=0x15f6dc0) at /usr/include/boost/asio/detail/reactive_null_buffers_op.hpp:75
#13 0x00000000004564ca in boost::asio::detail::task_io_service_operation::complete (this=0x15f6dc0, owner=..., ec=..., bytes_transferred=0) at /usr/include/boost/asio/detail/task_io_service_operation.hpp:38
#14 0x0000000000457fed in boost::asio::detail::epoll_reactor::descriptor_state::do_complete (owner=0x15f2080, base=0x15f6820, ec=..., bytes_transferred=1) at /usr/include/boost/asio/detail/impl/epoll_reactor.ipp:651
#15 0x00000000004564ca in boost::asio::detail::task_io_service_operation::complete (this=0x15f6820, owner=..., ec=..., bytes_transferred=1) at /usr/include/boost/asio/detail/task_io_service_operation.hpp:38
#16 0x0000000000458a9a in boost::asio::detail::task_io_service::do_run_one (this=0x15f2080, lock=..., this_thread=..., ec=...) at /usr/include/boost/asio/detail/impl/task_io_service.ipp:372
#17 0x00000000004585cc in boost::asio::detail::task_io_service::run (this=0x15f2080, ec=...) at /usr/include/boost/asio/detail/impl/task_io_service.ipp:149
#18 0x0000000000458d3f in boost::asio::io_service::run (this=0x7fffb6a8d010) at /usr/include/boost/asio/impl/io_service.ipp:59
#19 0x0000000000453846 in main () at /home/jens/azmq-crash/main.cpp:93

When bool crash = false; is used in main(), the program works as expected and the python script fills the terminal with b'foo's.

JenSte commented 6 years ago

Note that similar stack traces were posted in #68, however the program posted here crashes directly on the first message that it tries to send, independently of any timing between messages.

I also ran the test cases that come with azmq, all of them passed.

dkl commented 4 years ago

Hi, I'm having a problem that looks very similar:

==3828== Invalid read of size 8
==3828==    at 0x4C367EE: memmove (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==3828==    by 0x26B2A8: boost::asio::detail::buffer_copy_1(boost::asio::mutable_buffer const&, boost::asio::const_buffer const&) (buffer.hpp:2180)
==3828==    by 0x272A9F: unsigned long boost::asio::detail::buffer_copy<boost::asio::mutable_buffer const*, boost::asio::const_buffer const*>(boost::asio::detail::one_buffer, boost::asio::detail::one_buffer, boost::asio::mutable_buffer const*, boost::asio::mutable_buffer const*, boost::asio::const_buffer const*, boost::asio::const_buffer const*) (buffer.hpp:2189)
==3828==    by 0x2704AE: unsigned long boost::asio::buffer_copy<boost::asio::mutable_buffers_1, boost::asio::const_buffer>(boost::asio::mutable_buffers_1 const&, boost::asio::const_buffer const&) (buffer.hpp:2371)
==3828==    by 0x26DE8D: azmq::message::message(boost::asio::const_buffer const&) (message.hpp:79)
==3828==    by 0x27C643: boost::enable_if<boost::has_range_const_iterator<boost::asio::const_buffers_1>, unsigned long>::type azmq::detail::socket_ops::send<boost::asio::const_buffers_1>(boost::asio::const_buffers_1 const&, std::unique_ptr<void, azmq::detail::socket_ops::socket_close>&, int, boost::system::error_code&) (socket_ops.hpp:284)
==3828==    by 0x27A979: azmq::detail::send_buffer_op_base<boost::asio::const_buffers_1>::do_perform(azmq::detail::reactor_op*, std::unique_ptr<void, azmq::detail::socket_ops::socket_close>&) (send_op.hpp:68)
==3828==    by 0x26E501: azmq::detail::reactor_op::do_perform(std::unique_ptr<void, azmq::detail::socket_ops::socket_close>&) (reactor_op.hpp:29)
==3828==    by 0x26E93E: azmq::detail::socket_service::per_descriptor_data::perform_ops(boost::intrusive::list<azmq::detail::reactor_op, boost::intrusive::member_hook<azmq::detail::reactor_op, boost::intrusive::list_member_hook<>, &azmq::detail::reactor_op::member_hook_> >&, boost::system::error_code&) (socket_service.hpp:130)
==3828==    by 0x26F34E: azmq::detail::socket_service::reactor_handler::operator()(boost::system::error_code, unsigned long) const (socket_service.hpp:625)
==3828==    by 0x280B7C: boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>::operator()() (bind_handler.hpp:164)
==3828==    by 0x2800D0: void boost::asio::asio_handler_invoke<boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long> >(boost::asio::detail::binder2<azmq::detail::socket_service::reactor_handler, boost::system::error_code, unsigned long>&, ...) (handler_invoke_hook.hpp:69)

I tried to debug it a bit, and apparently the problem is that azmq is only storing a reference to the boost::asio::const_buffer object in the internal send_buffer_op object in its operation queue. So if the original boost::asio::const_buffer object goes out of scope (such as when returning from start_sending() in the above test program with crash = true), then azmq has a dangling reference that it tries to access the next time it tries to perform that operation.

The symptoms seem to vary; if the message::message() constructor is given a completely invalid boost::asio::const_buffer, and then anything can happen. Sometimes zmq_msg_init_size() fails with ENOMEM due to a ridiculously huge buffer size, and sometimes it crashes in the boost::asio::buffer_copy(), because the source buffer is an invalid address or null pointer.

Looking at reactive_socket_send_op_base in boost::asio, it stores a copy of the ConstBufferSequence it's given. Seems to me like azmq should do the same. Afterall, the boost::asio docs state that the "buffers object may be copied as necessary", and only mentions the caller retaining ownership of the "underlying memory blocks", not the buffer object. And interestingly enough, azmq's receive_buffer_op_base is already doing it.