voipmonitor / sniffer

VoIPmonitor sniffer sources
227 stars 105 forks source link

Crash and segfault voipmonitor server #52

Closed arvatoth-vcc closed 6 years ago

arvatoth-vcc commented 6 years ago

Hello,

We have got a strange issue with voipmonitor server. We have got 20+ server with sniffer and one server. There are 2300-2500 parallel calls. We are using Ubuntu Trusty 14.04 and compiling voipmonitor/sniffer from git source code.

Sample log from server:

2018-03-12T09:00:57+01:00 dumper2 voipmonitor[29144]: calls[2518,r:15][2518,r:21] PS[C:48/-28 r:12/-11 S:442/442 SR:28 SM:- R:79000 A:151280] SQLq[C:0] heap[0|1|0] comp[62] [231.8Mb/s] cdq[0][4.8 MB/s] t0i_vmbr0_CPU[0.1Mb/s;0.0%/4.7%/4.8%/4.8%/4.7%] t0CPU[4.8/4.7%] t1CPU[0.0%/0.0%/0.0%/0.0%/0.2%/0.4%/0.0%/0.6%/0.6%/0.0%/1.1%/0.0%/0.2%/0.3%/0.0%/0.1%/1.4%/0.6%/0.8%/1.6%/0.8%/0.2%/0.5%/0.1%/1.9%/1.8%/0.5%/0.2%/0.0%/0.0%/0.0%/0.0%/0.0%/0.0%/0.0%] t2CPU[pb:13.5/d12.3/rm:10.2/rh:4.2/rd:12.1/S:52.3%] tRTP_CPU[151.3%/44.2m/4t] tsip_tcpCPU[625l|0/0s|0/0p] tacCPU[62.2|54.4|50.9|50.8|42.1%] RSS/VSZ[2964|7727]MB LA[5.25 3.88 3.15] v22.0.2 

Server has got 48 GB RAM and two Intel Xeon E5645 CPUs. Server is crashing more than 12-15 times in one hour. Here is the gdb bt full outoput:

#0  0x00007ff75569cc37 in __GI_raise (sig=sig@entry=6)
    at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
        resultvar = 0
        pid = 17483
        selftid = 21531
#1  0x00007ff7556a0028 in __GI_abort () at abort.c:89
        save_stage = 2
        act = {__sigaction_handler = {sa_handler = 0x2d34365f3638782f, 
            sa_sigaction = 0x2d34365f3638782f}, sa_mask = {__val = {
              7955377262162766188, 7526764445220745077, 3543825127326180722, 
              7378645557452156473, 3472334915645289783, 4122261946868641072, 
              8223625903104013668, 3544385890001366391, 4195437442677878841, 
              3906364935141077040, 2314885530818453553, 2314885530818453536, 
              7795484802351636512, 3917909816998060649, 3276497845987585332, 
              0}}, sa_flags = 67, sa_restorer = 0x7ff5e67fb800}
        sigs = {__val = {32, 0 <repeats 15 times>}}
#2  0x00007ff7556d92a4 in __libc_message (do_abort=do_abort@entry=2, 
    fmt=fmt@entry=0x7ff7557e8db0 "*** %s ***: %s terminated\n")
    at ../sysdeps/posix/libc_fatal.c:175
        ap = {{gp_offset = 32, fp_offset = 0, 
            overflow_arg_area = 0x7ff5e67fb810, 
            reg_save_area = 0x7ff5e67fb7a0}}
        fd = 2
        on_2 = <optimized out>
        list = <optimized out>
        nlist = <optimized out>
        cp = <optimized out>
        written = <optimized out>
#3  0x00007ff75577487c in __GI___fortify_fail (msg=<optimized out>, 
    msg@entry=0x7ff7557e8d47 "buffer overflow detected") at fortify_fail.c:38
        do_abort = 2
#4  0x00007ff755773750 in __GI___chk_fail () at chk_fail.c:28
No locals.
#5  0x00007ff7557747c7 in __fdelt_chk (d=<optimized out>) at fdelt_chk.c:25
No locals.
#6  0x000000000054932b in PcapQueue_readFromFifo::_socketRead (
    this=this@entry=0x292e260, socket=1095, 
    data=data@entry=0x7ff5c4000b50 "h\005", 
    dataLen=dataLen@entry=0x7ff5e67fbab8, timeout=timeout@entry=1)
    at pcap_queue.cpp:6547
        __d = 1095
        rfds = {fds_bits = {0 <repeats 16 times>}}
        tv = {tv_sec = 31, tv_usec = 79}
        rsltSelect = <optimized out>
        maxDataLen = 1000
#7  0x000000000055643b in PcapQueue_readFromFifo::socketRead (
    this=this@entry=0x292e260, data=data@entry=0x7ff5c4000b50 "h\005", 
    dataLen=dataLen@entry=0x7ff5e67fbab8, idConnection=<optimized out>)
    at pcap_queue.cpp:6538
No locals.
#8  0x0000000000556feb in PcapQueue_readFromFifo::threadFunction (
    this=0x292e260, arg=<optimized out>, arg2=44) at pcap_queue.cpp:5461
        detectSensorName = false
        detectSensorTime = false
        sensorId = <optimized out>
        sensorName = ""
        sensorTime = ""
        require_confirmation = -1
        counterEmptyData = 0
        buffer = 0x7ff5c4000b50 "h\005"
        offsetBuffer = 0
        countErrors = 0
        readLen = 0
        forceStop = false
        lastTimeErrorLogMS = 0
        blockStore = 0x7ff5c4000aa0
        bufferSize = 1000
        bufferLen = 0
        offsetBufferSyncRead = 0
        syncBeginBlock = true
        error = ""
#9  0x000000000066cbaf in vm_pthread_create_start_routine (arg=0x7ff700000ee0)
    at tools.cpp:5662
        thread_data = {
          start_routine = 0x53ea50 <_PcapQueue_readFromFifo_connectionThreadFunction(void*)>, arg = 0x7ff7000071e0, description = "pb - client 10.59.17.194"}
#10 0x00007ff759b45184 in start_thread (arg=0x7ff5e67fc700)
    at pthread_create.c:312
        __res = <optimized out>
        pd = 0x7ff5e67fc700
        now = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140694110848768, 
                3075006131295898178, 0, 0, 140694110849472, 140694110848768, 
                -3078402520661734846, -3079847928684066238}, 
              mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, 
            data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = <optimized out>
        pagesize_m1 = <optimized out>
        sp = <optimized out>
        freesize = <optimized out>
        __PRETTY_FUNCTION__ = "start_thread"
#11 0x00007ff75576403d in clone ()
    at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
No locals.

Here is our server config:

[general]
pcap_dump_zip = yes
pcap_dump_ziplevel = 6
pcap_dump_bufflength = 8184
pcap_dump_writethreads = 1
pcap_dump_writethreads_max = 32
pcap_dump_asyncwrite = yes
pcap_dump_zip_rtp = gzip
mysqlloadconfig = no
tar = no
tar_compress_sip = no
tar_compress_rtp = no
tar_compress_graph = no
interface = vmbr0
threading_mod = 4
mirror_bind_ip = 192.168.47.83
mirror_bind_port = 60000
managerport = 5029
sipport = 5060
sipport = 5555
sipport = 6000
cdr_sipport = yes
rtptimeout = 3600
ringbuffer = 2000
packetbuffer_enable             = yes
packetbuffer_compress           = no
max_buffer_mem                  = 12288
cdrproxy = yes
rtp-firstleg = no
deduplicate = no
deduplicate_ipheader = no
sipoverlap = no
sip-register = yes
sip-register-active-nologbin = yes
nocdr = yes
savesip = yes   
savertp = yes
savertcp = yes
savegraph = none
mos_g729 = no
mos_lqo = no
mos_lqo_bin = pesq
mos_lqo_ref = /usr/local/share/voipmonitor/audio/mos_lqe_original.wav
dscp = no
spooldir = /storage/voipmonitor
cachedir = /var/tmp/ramfs
promisc = yes
vm-buffer = 500

Thank you and please let me known if you need more details.

arvatoth-vcc commented 6 years ago

Have you got any idea?

voipmonitor commented 6 years ago

Hi, can you please try the latest version in develop branch? We have replaced socket to poll

arvatoth-vcc commented 6 years ago

Hi, Thank you, now segfaults has eliminated. Have you got any other todos or close issue?

voipmonitor commented 6 years ago

how you mean?

arvatoth-vcc commented 6 years ago

Does not matter, I close this ticket.