scylladb / seastar

High performance server-side application framework
http://seastar.io
Apache License 2.0
8.28k stars 1.54k forks source link

Help: segment fault when running ip_test #201

Open abc100m opened 7 years ago

abc100m commented 7 years ago

Segment fault when running ip_test. Below is the trace stack

[root@localhost tests]# ./ip_test --network-stack posix
core/reactor.hh:1018:13: runtime error: reference binding to null pointer of type 'struct reactor'
core/reactor.cc:2526:22: runtime error: member call on null pointer of type 'struct reactor'
ASAN:SIGSEGV
=================================================================
==5643==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000ec0 (pc 0x0000007924ab bp 0x7fff9ac7aa30 sp 0x7fff9ac7aa10 T0)
    #0 0x7924aa in circular_buffer<std::unique_ptr<task, std::default_delete<task> >, std::allocator<std::unique_ptr<task, std::default_delete<task> > > >::maybe_expand(unsigned long) (/root/test/seastar-master/build/debug/tests/ip_test+0x7924aa)
    #1 0x759634 in circular_buffer<std::unique_ptr<task, std::default_delete<task> >, std::allocator<std::unique_ptr<task, std::default_delete<task> > > >::push_back(std::unique_ptr<task, std::default_delete<task> >&&) (/root/test/seastar-master/build/debug/tests/ip_test+0x759634)
    #2 0x73b9ca in reactor::add_task(std::unique_ptr<task, std::default_delete<task> >&&) (/root/test/seastar-master/build/debug/tests/ip_test+0x73b9ca)
    #3 0x4fc00a in reactor::poller::do_register() core/reactor.cc:2526
    #4 0x73b6cb in reactor::poller::poller(std::unique_ptr<reactor::pollfn, std::default_delete<reactor::pollfn> >) core/reactor.hh:666
    #5 0xb6faa0 in simple<net::qp::qp(bool, std::string, uint8_t)::<lambda()> > core/reactor.hh:662
    #6 0xb68dfa in net::qp::qp(bool, std::string, unsigned char) net/net.cc:170
    #7 0xbdac71 in virtio::qp::qp(virtio::device*, unsigned long, unsigned long) net/virtio.cc:757
    #8 0xbdbb91 in virtio::qp_vhost::qp_vhost(virtio::device*, boost::program_options::variables_map) net/virtio.cc:819
    #9 0xbfedf8 in std::_MakeUniq<virtio::qp_vhost>::__single_object std::make_unique<virtio::qp_vhost, virtio::device*, boost::program_options::variables_map&>(virtio::device*&&, boost::program_options::variables_map&) /opt/scylladb/include/c++/5.3.1/bits/unique_ptr.h:765
    #10 0xbdd683 in virtio::device::init_local_queue(boost::program_options::variables_map, unsigned short) net/virtio.cc:985
    #11 0x464b76 in main tests/ip_test.cc:35
    #12 0x7f141c6bfb14 in __libc_start_main (/lib64/libc.so.6+0x21b14)
    #13 0x463770  (/root/test/seastar-master/build/debug/tests/ip_test+0x463770)

AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV ??:0 circular_buffer<std::unique_ptr<task, std::default_delete<task> >, std::allocator<std::unique_ptr<task, std::default_delete<task> > > >::maybe_expand(unsigned long)
==5643==ABORTING
abc100m commented 7 years ago

OK, I think this is a BUG

inline reactor& engine() {
    return *local_engine;
}

void smp::allocate_reactor() {
    assert(!reactor_holder);

    // we cannot just write "local_engin = new reactor" since reactor's constructor
    // uses local_engine
    void *buf;
    int r = posix_memalign(&buf, 64, sizeof(reactor));
    assert(r == 0);
    local_engine = reinterpret_cast<reactor*>(buf);
    new (buf) reactor;
    reactor_holder.reset(local_engine);
}

smp::allocate_reactor is called only in smp::configure which is only called by app_template::run_deprecated.

so this test application will not work because it don't have an app_template instance

abc100m commented 7 years ago

I have changed tcp_test.c to this, but still segfault ... need help!

tcp_test.cc

#include "net/ip.hh"
#include "net/virtio.hh"
#include "net/tcp.hh"
#include "core/app-template.hh"

using namespace net;

struct tcp_test {
    ipv4& inet;
    using tcp = net::tcp<ipv4_traits>;
    tcp::listener _listener;
    struct connection {
        tcp::connection tcp_conn;
        explicit connection(tcp::connection tc) : tcp_conn(std::move(tc)) {}
        void run() {
            tcp_conn.wait_for_data().then([this] {
                auto p = tcp_conn.read();
                if (!p.len()) {
                    tcp_conn.close_write();
                    return;
                }
                print("read %d bytes\n", p.len());
                tcp_conn.send(std::move(p));
                run();
            });
        }
    };
    tcp_test(ipv4& inet) : inet(inet), _listener(inet.get_tcp().listen(10000)) {}
    void run() {
        _listener.accept().then([this] (tcp::connection conn) {
            (new connection(std::move(conn)))->run();
            run();
        });
    }
};

int main(int ac, char** av) {

    using namespace std;
    cout << "here 1" << endl;

#if 1
    app_template app;
    return app.run(ac, av, []{
#endif   

        boost::program_options::variables_map opts;
        opts.insert(std::make_pair("tap-device", boost::program_options::variable_value(std::string("tap0"), false)));

        cout << "here xx" << endl;
        auto vnet = create_virtio_net_device(opts);
        cout << "here xx 2" << endl;

        interface netif(std::move(vnet));
        cout << "here 1-2" << endl;
        cout.flush();

        ipv4 inet(&netif);
        inet.set_host_address(ipv4_address("192.168.122.2"));    

        cout << "here 2" << endl;
        tcp_test tt(inet);
#if 0
        engine().when_started().then([&tt] { tt.run(); });
        engine().run();
#else
        tt.run();
#endif   

#if 1
        return make_ready_future<>();
    });
#endif

    return 0;
}

and here is the backtrace

(gdb) r -c1
Starting program: /root/test/seastar-master/build/release/tests/./tcp_test -c1
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
here 1
[New Thread 0x7ffff0fff700 (LWP 8899)]
here xx
here xx 2

Program received signal SIGSEGV, Segmentation fault.
subscription (s=0xc8, this=0x7fffffffc6e0) at ./core/stream.hh:201
201     assert(!_stream->_sub);
Missing separate debuginfos, use: debuginfo-install cryptopp-5.6.2-10.el7.x86_64 glibc-2.17-106.el7_2.1.x86_64 gmp-6.0.0-12.el7_1.x86_64 gnutls-3.3.8-14.el7_2.x86_64 gnutls-c++-3.3.8-14.el7_2.x86_64 hwloc-libs-1.7-5.el7.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.13.2-10.el7.x86_64 libaio-0.3.109-13.el7.x86_64 libcom_err-1.42.9-7.el7.x86_64 libffi-3.0.13-16.el7.x86_64 libpciaccess-0.13.4-2.el7.x86_64 libselinux-2.2.2-6.el7.x86_64 libtasn1-3.8-2.el7.x86_64 libunwind-1.1-5.el7_2.2.x86_64 libxml2-2.9.1-6.el7_2.3.x86_64 lz4-r131-1.el7.x86_64 nettle-2.7.1-4.el7.x86_64 numactl-libs-2.0.9-6.el7_2.x86_64 openssl-libs-1.0.1e-42.el7.9.x86_64 p11-kit-0.20.7-3.el7.x86_64 pcre-8.32-15.el7.x86_64 protobuf-2.5.0-8.el7.x86_64 scylla-boost-filesystem-1.58.0-11.el7.centos.x86_64 scylla-boost-program-options-1.58.0-11.el7.centos.x86_64 scylla-boost-system-1.58.0-11.el7.centos.x86_64 scylla-boost-test-1.58.0-11.el7.centos.x86_64 scylla-boost-thread-1.58.0-11.el7.centos.x86_64 scylla-libgcc-5.3.1-2.el7.centos.x86_64 scylla-libstdc++-5.3.1-2.el7.centos.x86_64 trousers-0.3.13-1.el7.x86_64 xz-libs-5.1.2-12alpha.el7.x86_64 zlib-1.2.7-15.el7.x86_64
(gdb) where
#0  subscription (s=0xc8, this=0x7fffffffc6e0) at ./core/stream.hh:201
#1  listen (next=..., this=0xc8) at ./core/stream.hh:146
#2  net::device::receive(std::function<future<> (net::packet)>) (this=0x6000000a50e0, next_packet=...) at net/net.cc:251
#3  0x0000000000555213 in net::interface::interface (this=0x7fffffffc810, dev=...) at net/net.cc:278
#4  0x0000000000464b8c in <lambda()>::operator()(void) const (__closure=<optimized out>) at tests/tcp_test.cc:78
#5  0x0000000000465209 in std::_Function_handler<future<>(), main(int, char**)::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /opt/scylladb/include/c++/5.3.1/functional:1857
#6  0x00000000005291ed in operator() (this=<optimized out>) at /opt/scylladb/include/c++/5.3.1/functional:2271
#7  operator() (__closure=<optimized out>) at core/app-template.cc:78
#8  std::_Function_handler<future<int>(), app_template::run(int, char**, std::function<future<>()>&&)::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /opt/scylladb/include/c++/5.3.1/functional:1857
#9  0x00000000005299dc in operator() (this=0x60000008d3a0) at /opt/scylladb/include/c++/5.3.1/functional:2271
#10 apply<std::function<future<int>()>&> (func=...) at ./core/future.hh:1248
#11 futurize_apply<std::function<future<int>()>&> (func=...) at ./core/future.hh:1306
#12 app_template::<lambda()>::operator()(void) (__closure=0x60000008d3a0) at core/app-template.cc:67
#13 0x00000000005301c8 in operator() (this=<optimized out>) at /opt/scylladb/include/c++/5.3.1/functional:2271
#14 apply (args=<optimized out>, func=<optimized out>) at ./core/apply.hh:34
#15 apply<std::function<void()> > (args=<optimized out>, func=<optimized out>) at ./core/apply.hh:42
#16 do_void_futurize_apply_tuple<std::function<void()> > (args=<optimized out>, 
    func=<unknown type in /root/test/seastar-master/build/release/tests/tcp_test, CU 0x8858ec, DIE 0x934934>)
    at ./core/future.hh:1206
#17 apply<std::function<void()> > (args=<optimized out>, func=<optimized out>) at ./core/future.hh:1226
#18 operator()<future_state<> > (
    state=<unknown type in /root/test/seastar-master/build/release/tests/tcp_test, CU 0x8858ec, DIE 0x9560bd>, 
    __closure=0x600000093f10) at ./core/future.hh:848
#19 _ZN12continuationIZN6futureIJEE4thenISt8functionIFvvEES1_EET0_OT_EUlS8_E_JEE3runEv (this=0x600000093f00)
    at ./core/future.hh:390
---Type <return> to continue, or q <return> to quit---
#20 0x000000000047cd5f in reactor::run_tasks (this=this@entry=0x6000001fb000, tasks=...) at core/reactor.cc:1924
#21 0x00000000004c7973 in reactor::run (this=0x6000001fb000) at core/reactor.cc:2356
#22 0x000000000052ba2d in app_template::run_deprecated(int, char**, std::function<void ()>&&) (
    this=this@entry=0x7fffffffe2d0, ac=ac@entry=2, av=av@entry=0x7fffffffe4f8, 
    func=func@entry=<unknown type in /root/test/seastar-master/build/release/tests/tcp_test, CU 0x8858ec, DIE 0x95026e>)
    at core/app-template.cc:131
#23 0x000000000052ca63 in app_template::run(int, char**, std::function<future<int> ()>&&) (
    this=this@entry=0x7fffffffe2d0, ac=ac@entry=2, av=av@entry=0x7fffffffe4f8, 
    func=func@entry=<unknown type in /root/test/seastar-master/build/release/tests/tcp_test, CU 0x8858ec, DIE 0x95429a>)
    at core/app-template.cc:72
#24 0x000000000052cbd3 in app_template::run(int, char**, std::function<future<> ()>&&) (this=this@entry=0x7fffffffe2d0, 
    ac=ac@entry=2, av=av@entry=0x7fffffffe4f8, 
    func=func@entry=<unknown type in /root/test/seastar-master/build/release/tests/tcp_test, CU 0x8858ec, DIE 0x954987>)
    at core/app-template.cc:81
#25 0x00000000004625ea in main (ac=2, av=0x7fffffffe4f8) at tests/tcp_test.cc:98
javeme commented 7 years ago

I also hit the bug, the root cause was the _queues[engine().cpu_id()] was null. I will submit a patch.

subscription<packet> device::receive(std::function<future<> (packet)> next_packet) {
    // _queues[engine().cpu_id()] may be nullptr
    auto sub = _queues[engine().cpu_id()]->_rx_stream.listen(std::move(next_packet));
    _queues[engine().cpu_id()]->rx_start();
    return std::move(sub);
}