OpenSIPS / opensips

OpenSIPS is a GPL implementation of a multi-functionality SIP Server that targets to deliver a high-level technical solution (performance, security and quality) to be used in professional SIP server platforms.
https://opensips.org
Other
1.28k stars 581 forks source link

SIPTRACE module at ver.2.2-beta crashes #862

Closed yuri2000 closed 8 years ago

yuri2000 commented 8 years ago

It looks like some information is missing in the SIPTRACE module documentation for OpenSIPS 2.2, or apparently it bears a bug. With the given example of the configuration the log file displays these messages: ERROR:siptrace:save_siptrace: invalid trace info ERROR:siptrace:trace_msg_out: failed to save siptrace

And this is the config snippet:

SIPTRACE module

loadmodule "siptrace.so" modparam("siptrace", "trace_id", "[tid]uri=mysql://opensips:opensipsrw@dbserver/opensips;table=sip_trace;") modparam("siptrace", "trace_on", 1)

....

    if (method==INVITE && !has_totag()) {
            $var(user)="osips@thisnetwork.com";
            $var(trace_id) = "tid";
            sip_trace("$var(trace_id)", "d", "$var(user)");
    }

Then at some particular scenario SIPTRACE module crashes the OpenSIPS 2.2 application:

0 save_siptrace (info=0x7f6878bf6b58, msg=, vals=0x7f68722bcf80, keys=0x7f68722bd180) at siptrace.c:878

    hash = <value optimized out>
    it = <value optimized out>

1 0x00007f68720b5415 in trace_onreply_out (t=, type=, ps=0x7fff0548a820) at siptrace.c:1982

    faked = <value optimized out>
    fromip_buff = "udp1XX.2X.XXX.185", '\000' <repeats 34 times>
    toip_buff = "udp1XX.2X.XXX.78", '\000' <repeats 35 times>
    msg = 0x7f6878c00740
    to_ip = {af = 2, len = 4, u = {addrl = {1310254956, 0}, addr32 = {1310254956, 0, 0, 0}, addr16 = {59244, 19992, 0, 0, 0, 0, 0, 0}, addr = "l\347\030N", '\000' <repeats 11 times>}}
    len = 3
    statusbuf = "408\000\001\000\000"
    sbuf = <value optimized out>
    dst = <value optimized out>
    __FUNCTION__ = "trace_onreply_out"

2 0x00007f68748789a1 in run_trans_callbacks (type=128, trans=0x7f6878bfecc8, req=, rpl=, code=) at t_hooks.c:209

    params = {req = 0x7f6878c00740, rpl = 0xffffffffffffffff, code = 408, param = 0x7f6878bfeb28, extra1 = 0x7fff0548a9a0, extra2 = 0x7f6878bfede0}
    cbp = 0x7f6878bfeb18
    backup = 0x7816c0
    trans_backup = 0x7f6878bfecc8
    __FUNCTION__ = "run_trans_callbacks"

3 0x00007f6874878b00 in run_trans_callbacks_locked (type=, trans=0x7f6878bfecc8, req=, rpl=, code=)

at t_hooks.c:262

No locals.

4 0x00007f68748851b1 in relay_reply (t=0x7f6878bfecc8, p_msg=, branch=, msg_status=, cancel_bitmap=0x7fff0548aa7c)

at t_reply.c:1255
    relay = 0
    save_clone = 0
    buf = 0x7f6888aa78a8 "SIP/2.0 408 Request Timeout\r\nVia: SIP/2.0/UDP 1XX.2X.XXX.78:8408;received=1XX.2X.XXX.78;branch=z9hG4bK-524287-1---af6608797b55630d;rport=8408\r\nTo: <sip:TestingSubscriber1002@sip.thisnetwork.com>;tag=3b8d"...
    res_len = 412
    relayed_code = 408
    relayed_msg = 0xffffffffffffffff
    bm = {to_tag_val = {
        s = 0x7f6888aa796c "3b8d6d10a54b0752c937c18d274bef37-402e\r\nFrom: \"TestingSubscriber1001\"<sip:TestingSubscriber1001@sip.thisnetwork.com>;tag=735c442d\r\nCall-ID: PKSLf7Smdwyk-hULVsGkKg..\r\nCSeq: 2 INVITE\r\nServer: SBC-185\r\nConte"..., len = 37}}
    totag_retr = 0
    reply_status = RPS_COMPLETED
    uas_rb = 0x7f6878bfedc8
    cb_s = {
      s = 0x7f6888aa78a8 "SIP/2.0 408 Request Timeout\r\nVia: SIP/2.0/UDP 1XX.2X.XXX.78:8408;received=1XX.2X.XXX.78;branch=z9hG4bK-524287-1---af6608797b55630d;rport=8408\r\nTo: <sip:TestingSubscriber1002@sip.thisnetwork.com>;tag=3b8d"..., len = 412}
    text = {s = 0x50daf3 "Request Timeout", len = 15}
    __FUNCTION__ = "relay_reply"

5 0x00007f687488886e in fake_reply (ticks=70, set=) at timer.c:260

    cancel_bitmap = 0
    do_cancel_branch = <value optimized out>
    reply_status = <value optimized out>

6 final_response_handler (ticks=70, set=) at timer.c:391

    old_ctx = 0x0
    my_ctx = 0x7f6888aa7f08
    r_buf = 0x7f6878bfeea0

---Type to continue, or q to quit--- t = 0x7f6878bfecc8

7 timer_routine (ticks=70, set=) at timer.c:1066

    tl = 0x7f6878bfef18
    tmp_tl = 0x0
    id = <value optimized out>
    __FUNCTION__ = "timer_routine"

8 0x000000000047e28b in handle_timer_job () at timer.c:632

    t = 0x7f687890d760
    l = <value optimized out>
    __FUNCTION__ = "handle_timer_job"

9 0x00000000004ed64d in handle_io (fm=0x7f6888a9c388, idx=1, event_type=) at net/net_tcp_proc.c:117

    ret = 0
    n = <value optimized out>
    con = <value optimized out>
    s = <value optimized out>
    rw = <value optimized out>
    resp = <value optimized out>
    response = {140733282036968, 140086962256117}
    __FUNCTION__ = "handle_io"

10 0x00000000004ee140 in io_wait_loop_epoll (repeat=0, t=2, h=) at ./io_wait_loop.h:221

    n = 1
    r = 1
    e = 0x7f6888a9c388
    ret = 1

11 0x00000000004ef407 in tcp_worker_proc (unix_sock=) at net/net_tcp_proc.c:312

    __FUNCTION__ = "tcp_worker_proc"

12 0x00000000004eb31d in tcp_start_processes (chd_rank=0x7653c8, startup_done=0x0) at net/net_tcp.c:1758

    r = <value optimized out>
    reader_fd = {92, 94}
    pid = 0
    si = <value optimized out>
    load_p = 0x7f6878bc58c8
    __FUNCTION__ = "tcp_start_processes"

13 0x000000000042f4af in main_loop (argc=, argv=) at main.c:677

    startup_done = 0x0
    chd_rank = 44

14 main (argc=, argv=) at main.c:1252

    cfg_stream = <value optimized out>
    c = <value optimized out>
    r = <value optimized out>
    tmp = 0x7fff0548bf70 ""
    tmp_len = <value optimized out>
    port = 0
    proto = 5282149
    options = 0x510b48 "f:cCm:M:b:l:n:N:rRvdDFETSVhw:t:u:g:P:G:W:o:"
    ret = -1
    seed = 221183166
    rfd = <value optimized out>
    __FUNCTION__ = "main"

This is the version print (installed via RPMs): opensips -V version: opensips 2.2.0-beta (x86_64/linux) flags: STATS: On, DISABLE_NAGLE, USE_MCAST, SHM_MMAP, PKG_MALLOC, F_MALLOC, DBG_MALLOC, FAST_LOCK-ADAPTIVE_WAIT ADAPTIVE_WAIT_LOOPS=1024, MAX_RECV_BUFFER_SIZE 262144, MAX_LISTEN 16, MAX_URI_SIZE 1024, BUF_SIZE 65535 poll method support: poll, epoll_lt, epoll_et, sigio_rt, select. main.c compiled on 14:59:48 Mar 31 2016 with gcc 4.4.7

Thank you.

ionutrazvanionita commented 8 years ago

Hi. This https://github.com/OpenSIPS/opensips/commit/943d9015f8f1e3467acafee67693f157beab9755 fixes the errors, don't know if it fixes the crash. If it doesn't, please give more details about the scenario where it crashes.

ionutrazvanionita commented 8 years ago

@yuri2000 please talk here, it's easier to follow. In gdb, can you print the output of

list
p *it
yuri2000 commented 8 years ago

Hi Ionut, Thank you for your time and attention. This is the output: (gdb) list 894 if (!(_trace_on_flag)) { 895 LM_DBG("trace is off!\n"); 896 return 0; 897 } 898 899 hash = info->tracelist->hash; 900 / check where the hash matches and take the proper action _/ 901 for (it=info->trace_list; it && (it->hash == hash); it=it->next) { 902 if (!(_it->traceable)) 903 continue; (gdb) p *it $1 = {name = {s = 0x74e0458b48db8548 <Address 0x74e0458b48db8548 out of bounds>, len = 59328515}, type = 3099838811, hash = 201, traceable = 0xf0eb050f <Address 0xf0eb050f out of bounds>, el = {db = 0x0, uri = {user = { s = 0x0, len = 0}, passwd = {s = 0x0, len = 0}, host = {s = 0x0, len = 0}, port = { s = 0x0, len = 0}, params = {s = 0x0, len = 0}, headers = {s = 0x0, len = 0}, port_no = 0, proto = 0, type = ERROR_URI_T, transport = {s = 0x0, len = 0}, ttl = { s = 0x0, len = 0}, user_param = {s = 0x0, len = 0}, maddr = {s = 0x0, len = 0}, method = {s = 0x0, len = 0}, lr = {s = 0x0, len = 0}, r2 = {s = 0x0, len = 0}, gr = { s = 0x0, len = 0}, transport_val = {s = 0x0, len = 0}, ttl_val = {s = 0x0, len = 0}, user_param_val = {s = 0x0, len = 0}, maddr_val = {s = 0x0, len = 0}, method_val = { s = 0x0, len = 0}, lr_val = {s = 0x0, len = 0}, r2_val = {s = 0x0, len = 0}, gr_val = { s = 0x0, len = 0}, u_name = {{s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, { s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}}, u_val = {{s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}, {s = 0x0, len = 0}}, u_params_no = 0}}, next = 0x0} (gdb)

Thanks again, Yuri

On Mon, Apr 25, 2016 at 4:56 AM, Ionut Ionita notifications@github.com wrote:

@yuri2000 https://github.com/yuri2000 please talk here, it's easier to follow. In gdb, can you print the output of

list p *it

— You are receiving this because you were mentioned. Reply to this email directly or view it on GitHub https://github.com/OpenSIPS/opensips/issues/862#issuecomment-214214726

ionutrazvanionita commented 8 years ago

@yuri2000 thank you. Also can you please provide a little more details about the scenario?

yuri2000 commented 8 years ago

I upgraded the system from ver.1.9 to ver.2.2. OpenSIPS works with CDRTool/FreeRadius. In 1.9 everything worked fine. I needed WSS protocol and moved to 2.2. SIPTRACE has a new structure in 2.2 (What was bad in the previous one ? As a saying goes: "if it ain't broke, don't fix it"...).

The script contains this configuration for SIPTRACE:

SIPTRACE module

loadmodule "siptrace.so" modparam("siptrace", "trace_id", "[tid]uri=mysql://opensips:opensipsrw@dbserver/opensips;table=sip_trace;") modparam("siptrace", "trace_on", 1)

....

    if (method==INVITE && !has_totag()) {
            $var(user)="osips@thisnetwork.com";
            $var(trace_id) = "tid";
            sip_trace("$var(trace_id)", "d", "$var(user)");
    }

One of the routes in the script (called "blackhole") plays a role of the timer (for duration of the fr_timeout parameter):

route[blackhole] {

Wait a few seconds before diverting the call to voicemail - just

in case the device

is "sleeping", so a Push Notification may wake it up.

    xlog("Sleep route activated before diverting the call to

voicemail\n"); engage_media_proxy(); send_reply("100", "Trying"); send_reply("180", "Waking up the device"); t_on_reply("check_next"); t_on_failure("check_again"); t_relay("udp:1XX.5X.2XX.186:5060"); exit; }

The process works fine. The blackhole route is activated (the target udp:1XX.5X.2XX.186:5060 doesn't exist), push notification is sent, sleeping device wakes up and upon the fr_timeout expiration the failure route "check_again" takes charge for the further proccesing. Caller and callee are connected. If the callee still didn't answer the call reaches a voicemail. The problem (the subject of the discussion) happens when the caller cancels the call before the fr_timeout expiration. The application cannot cope with such an event and crashes. This doesn't happen in versions prior to 2.2 - even in 2.1 the application survives without an issue in the same scenario.

Remarking the "sip_trace" eliminates the crash problem in 2.2:

sip_trace("$var(trace_id)", "d", "$var(user)");

Reducing amount of attributes inside the sip_trace function like: sip_trace("$var(trace_id)", "d"); sip_trace("$var(trace_id)"); has no impact on the discussed phenomenon.

Please let me know your thoughts. Thank you, Yuri

On Tue, Apr 26, 2016 at 7:56 AM, Ionut Ionita notifications@github.com wrote:

@yuri2000 https://github.com/yuri2000 thank you. Also can you please provide a little more details about the scenario?

— You are receiving this because you were mentioned. Reply to this email directly or view it on GitHub https://github.com/OpenSIPS/opensips/issues/862#issuecomment-214716510

ionutrazvanionita commented 8 years ago

@yuri2000 https://github.com/OpenSIPS/opensips/commit/3c6b6930d615adb978dd2779b6cd4054c3bc49df ( https://github.com/OpenSIPS/opensips/commit/dbdc46c994c69b43bb124a397056268ddaf2f543 for 2.2 branch) there was a double free bug on cancelled transactions, sorry for that. Hope this fixes your problem.

yuri2000 commented 8 years ago

Thank you, Ionut. The problem is fixed.

Thanks again, Yuri

On Tue, Apr 26, 2016 at 9:52 AM, Ionut Ionita notifications@github.com wrote:

@yuri2000 https://github.com/yuri2000 3c6b693 https://github.com/OpenSIPS/opensips/commit/3c6b6930d615adb978dd2779b6cd4054c3bc49df ( dbdc46c https://github.com/OpenSIPS/opensips/commit/dbdc46c994c69b43bb124a397056268ddaf2f543 for 2.2 branch) there was a double free bug on cancelled transactions, sorry for that. Hope this fixes your problem.

— You are receiving this because you were mentioned. Reply to this email directly or view it on GitHub https://github.com/OpenSIPS/opensips/issues/862#issuecomment-214752101