kamailio / kamailio

Kamailio - The Open Source SIP Server for large VoIP and real-time communication platforms -
https://www.kamailio.org
Other
2.31k stars 962 forks source link

5.8.2++: TCP/TLS connection for WebSocket could not be found #3969

Closed schoberw closed 2 months ago

schoberw commented 2 months ago

Description

Kamailio is setup as a WebRTC to SIP(UDP) gateway. It forwards WSS to UDP and UDP to WSS connections. This was doing fine until 5.8.2 (so 5.8.1 is running perfectly).

Since 5.8.2 (and 5.8.3) the following message is printed on sending towards a connected WSS connection:

Sep  4 17:02:20 p01t-1 kamailio.webrtc[336177]: WARNING: webrtc:tm [../../core/forward.h:204]: msg_send_buffer(): TCP/TLS connection for WebSocket could not be found
Sep  4 17:02:20 p01t-1 kamailio.webrtc[336177]: WARNING: webrtc:tm [t_fwd.c:1586]: t_send_branch(): sending request on branch 0 failed
Sep  4 17:02:20 p01t-1 kamailio.webrtc[336177]: WARNING: webrtc:<script>: Forwarding request failed
Sep  4 17:02:20 p01t-1 kamailio.webrtc[336177]: INFO: webrtc:sl [sl_funcs.c:420]: sl_reply_error(): message marked with delayed-reply flag

Troubleshooting

Code:

        # $ru = INVITE sip:43130110221@192.168.48.131:5063;transport=udp;ntv3=4;alias=1.2.3.4~51820~6;fs=tls:100.100.100.100:443;uri=sip:43130110221%40192.168.63.105%3bline%3dwc-x9u7ra%3bx-ts;line=wc-x9u7ra;x-ts
        if ($(ru{uri.param,alias}{s.len}) && $(ru{uri.param,uri}{s.len})) {
                $var(target) = $rU;

                if (!handle_ruri_alias()) {
                        xlog("L_ERR", ">> Bad alias <$ru>");
                        sl_send_reply("400", "Bad Request");
                        exit;
                }
                if ($(ru{uri.param,fs}{s.len})) {
                        xlog("L_INFO", ">> setting local socket to $(ru{uri.param,fs})");
                        $fs = $(ru{uri.param,fs});
                }
                if ($dP =~ "ws|tls|tcp") {
                        set_forward_no_connect();
                        xlog("L_INFO", ">> checking tcp connection to $dd:$dp");
                        if(tcp_get_conid("$dd:$dp", "$var(conid)")) {
                                xlog("L_INFO", ">> connection id to $dd:$dp is: $var(conid)");
                        } else {
                                if (is_method("NOTIFY")) {
                                        xlog("L_ERR", ">> connection to $dd:$dp is no longer connected. Client must resubscribe!");
                                        sl_send_reply("604", "Please Remove Subscription"); 
                                } else {
                                        xlog("L_ERR", ">> connection to $dd:$dp is no longer connected. Client must reregister!");
                                        sl_send_reply("480", "TCP not connected");
                                }
                                exit;
                        }
                }
                $ru = $(ru{uri.param,uri}{s.unescape.param});
                $rU = $var(target);

                xlog("L_INFO", ">> OUT-CONTACT: $rm $ru ($du) - rP=$rP -> dp=$dP nh(P)=$nh(P)");
                route(FORWARD); # route that call immediately
        }

route[FORWARD] {
        xlog("L_INFO", "ROUTE[FORWARD] - R-URI/D-URI: $rm $ru ($du): $mf");

...

        xlog("L_INFO", ">> Forwarding request now");
        if (!t_relay()) {
                xlog("L_WARN", "Forwarding request failed");

...

onsend_route {
        if ($mt==1) {
                xlog("L_INFO", ">>> Sending Request: $rm $ru ($sndfrom(ip):$sndfrom(port) -> $sndto(ip):$sndto(port): $du)");
                xlog("L_INFO", "$sndto(buf)");
                #!ifdef WITH_HOMER
                # for some reason the ACK flagged in FORWARD is not traced on sending out
                if (isflagset(FL_TRACE)) {
                        xlog("L_INFO", ">> FL_TRACE on $rm set!");
                }
                if (is_method("ACK") && isflagset(FL_TRACE)) {
                        sip_trace();
                }
                #!endif
        } else {
                xlog("L_INFO", ">>> Sending Reply: $rs $rr ($sndfrom(ip):$sndfrom(port) -> $sndto(ip):$sndto(port))");
                xlog("L_INFO", "$sndto(buf)");
        }
}

Reproduction

Any call to the WebSocket is rejected now. Working fine before 5.8.2.

Log Messages

 kamailio.webrtc[338351]: INFO: webrtc:<script>: ROUTE[OUTBOUND] - route to registered contact: INVITE sip:15551234521@192.168.48.131:5063;transport=udp;ntv3=4;alias=101.100.100.129~52201~6;fs=tls:100.100.100.131:443;uri=sip:15551234521%40192.168.63.105%3bline%3dwc-x9u7ra%3bx-ts;line=wc-x9u7ra;x-ts
 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> read domain from P-Called-Party-ID: ws2.ipbx.tld
 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> Reduce Session Timer from 1800 to 120
 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> setting local socket to tls:100.100.100.131:443

optional. same without $fs=tls:100.100.100.131:443 (ws: and wss: are invalid for $fs)

 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> checking tcp connection to 101.100.100.129:52201
 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> connection id to 101.100.100.129:52201 is: 1

The connection is found and alive!

 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> OUT-CONTACT: INVITE sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts (sip:101.100.100.129:52201;transport=ws) - rP=UDP -> dp=ws nh(P)=ws
 kamailio.webrtc[338351]: INFO: webrtc:<script>: ROUTE[FORWARD] - R-URI/D-URI: INVITE sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts (sip:101.100.100.129:52201;transport=ws): 8388624
 kamailio.webrtc[338351]: INFO: webrtc:<script>: ROUTE[RTPP_REQ]
 kamailio.webrtc[338351]: INFO: webrtc:<script>: ROUTE[RTP_MANAGE]
 kamailio.webrtc[338351]: INFO: webrtc:<script>: > SIP -> WebRTC, bridging RTP->SRTP-DTLS and adding ICE - o-313732353436333239323434393936-ytpwonh6n9aq
 kamailio.webrtc[338351]: INFO: webrtc:<script>: ROUTE[RTP_MANAGE] method:INVITE, ruri:sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts, callid:o-313732353436333239323434393936-ytpwonh6n9aq, rtpengine_manage:call-id=o-313732353436333239323434393936-ytpwonh6n9aq via-branch=auto trust-address r
eplace-origin replace-session-connection rtcp-mux-accept rtcp-mux-require generate-mid SDES-off ICE=force ICE-lite=forward UDP/TLS/RTP/SAVPF pad-crypto
 kamailio.webrtc[338351]: INFO: webrtc:<script>: > rtpengine(): SUCCESS
 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> Forwarding request now

100 trying ...

 kamailio.webrtc[338351]: INFO: webrtc:<script>: >>> Sending Request: INVITE sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts (100.100.100.131:443 -> 101.100.100.129:52201: sip:101.100.100.129:52201;transport=ws)
 kamailio.webrtc[338351]: INFO: webrtc:<script>: INVITE sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts SIP/2.0
Record-Route: <sip:100.100.100.131:443;transport=ws;r2=on;lr;ftag=ly9574qrtx;x-dir=o;x-rtpp=1>
Record-Route: <sip:192.168.48.131:5063;r2=on;lr;ftag=ly9574qrtx;x-dir=o;x-rtpp=1>
Via: SIP/2.0/WSS 100.100.100.131:443;branch=z9hG4bK727a.06944932aa61c384f55e7cbc0a59ec0a.0
Via: SIP/2.0/UDP 192.168.48.74:5060;branch=z9hG4bKgWoe~Gc5kT
Via: SIP/2.0/UDP 192.168.48.74:5060;branch=z9hG4bKvA3weqaC-e;internal=6715591-1
From: "Walter 22" <sip:22@ws2.ipbx.tld>;tag=ly9574qrtx
To: <sip:21@ws2.ipbx.tld;user=phone>
Call-ID: 313732353436333239323434393936-ytpwonh6n9aq-UASession-p1EoqWEkFt-CQ-100-
CSeq: 1 INVITE
Max-Forwards: 67
Supported: replaces,from-change,timer,histinfo
Allow: INVITE,ACK,CANCEL,BYE,INFO,REFER,PRACK,NOTIFY
Contact: <sip:192.168.48.74:5060;internal=6715591-1;type=tpcc>
Content-Length: 649
Content-Type: application/sdp
Record-Route: <sip:192.168.48.74:5060;transport=udp;lr>
Allow-Events: talk,hold,refer
Accept: application/sdp
Session-Expires: 90;refresher=uas

[...] shortened SDP body

 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> FL_TRACE on INVITE set!
 kamailio.webrtc[338351]: INFO: webrtc:<script>: Network sending:
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
INVITE sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts SIP/2.0
Record-Route: <sip:100.100.100.131:443;transport=ws;r2=on;lr;ftag=ly9574qrtx;x-dir=o;x-rtpp=1>
Record-Route: <sip:192.168.48.131:5063;r2=on;lr;ftag=ly9574qrtx;x-dir=o;x-rtpp=1>
Via: SIP/2.0/WSS 100.100.100.131:443;branch=z9hG4bK727a.06944932aa61c384f55e7cbc0a59ec0a.0
Via: SIP/2.0/UDP 192.168.48.74:5060;branch=z9hG4bKgWoe~Gc5kT
Via: SIP/2.0/UDP 192.168.48.74:5060;branch=z9hG4bKvA3weqaC-e;internal=6715591-1
From: "Walter 22" <sip:22@ws2.ipbx.tld>;tag=ly9574qrtx
To: <sip:21@ws2.ipbx.tld;user=phone>
Call-ID: 313732353436333239323434393936-ytpwonh6n9aq-UASession-p1EoqWEkFt-CQ-100-
CSeq: 1 INVITE
Max-Forwards: 67
Supported: replaces,from-change,timer,histinfo
Allow: INVITE,ACK,CANCEL,BYE,INFO,REFER,PRACK,NOTIFY
Contact: <sip:192.168.48.74:5060;internal=6715591-1;type=tpcc>
Content-Length: 649
Content-Type: application/sdp
Record-Route: <sip:192.168.48.74:5060;transport=udp;lr>
Allow-Events: talk,hold,refer
Accept: application/sdp
Session-Expires: 90;refresher=uas

[...] shortened SDP body

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 kamailio.webrtc[338351]: WARNING: webrtc:tm [../../core/forward.h:204]: msg_send_buffer(): TCP/TLS connection for WebSocket could not be found
 kamailio.webrtc[338351]: WARNING: webrtc:tm [t_fwd.c:1586]: t_send_branch(): sending request on branch 0 failed
 kamailio.webrtc[338351]: WARNING: webrtc:<script>: Forwarding request failed

Possible Solutions

Downgrade to 5.8.1

schoberw commented 2 months ago

I found some changes in the change notes for HOMER socket assignment. But same happens with sipcapture module not inlcuded and HOMER sections commented out, too.

schoberw commented 2 months ago

https://github.com/kamailio/kamailio/commit/7d103cf3887401fca32d95f7ea7ee10f28e9e127

dst->proto should be right.

 kamailio.webrtc[338351]: INFO: webrtc:<script>: >> OUT-CONTACT: INVITE sip:15551234521@192.168.63.105;line=wc-x9u7ra;x-ts (sip:101.100.100.129:52201;transport=ws) - rP=UDP -> dp=ws nh(P)=ws

nh(P) = ws. It is not TLS. What is the "socket" for WSS?

henningw commented 2 months ago

I suspected that its related to the commit referenced above. Just to check, you have "tcp_connection_match" set to 1 in your cfg, right?

schoberw commented 2 months ago

I suspected that its related to the commit referenced above. Just to check, you have "tcp_connection_match" set to 1 in your cfg, right?

Yes. These are my tcp settings:

# Lower this value for faster detection of broken TCP connections.
tcp_send_timeout=3

tcp_connect_timeout=10  # def = 10
tcp_connection_lifetime=3605
tcp_rd_buf_size=32768
tcp_max_connections=2048
tcp_connection_match=1  # match also local port

tcp_keepalive=yes       # Enables keepalive for tcp (sets SO_KEEPALIVE socket option)
tcp_keepcnt=3           # Number of keepalives sent before dropping the connection (TCP_KEEPCNT socket option). Linux only.
tcp_keepidle=30         # Time before starting to send keepalives, if the connection is idle (TCP_KEEPIDLE socket option). Linux only.
tcp_keepintvl=10        # Time interval between keepalive probes, when the previous probe failed (TCP_KEEPINTVL socket option). Linux only.

tcp_async=yes           # alias tcp_buf_write, default=yes
tcp_crlf_ping=no
schoberw commented 2 months ago

Commenting the line

tcp_connection_match=1  # match also local port

"solves" the issue in the fist place. tcp_reuse_port = 1 makes no difference: the outgoing INVITE is sent from the local port 443 within the existing TCP sesssion (which is necessary for NAT/FW traversal).

Shouldn't "wss" match the "tls" socket though?

I can live with that config change since outgoing connections are never created on this system (WebRTC Gateway) and the existing connection is used anyway.

Pls comment or close!

Thanks for the hint!

henningw commented 2 months ago

Yes, if you deactivate this option it will not do this strict connction matching which was improved in the referenced patch. I will ask my colleague to have a look tomorrow to this change again.

henningw commented 2 months ago

It might be similar to an issue discussed in this thread on the mailing list link

miconda commented 2 months ago

I pushed a commit for it, can you test and see if works fine now?

schoberw commented 2 months ago

I pushed a commit for it, can you test and see if works fine now?

built from master, your patch is in the git log.

Sep  5 10:26:04 p01t-1 kamailio.webrtc[482512]: INFO: webrtc:<script>: Using stored socket: tls:100.100.100.131:443
Sep  5 10:26:04 p01t-1 kamailio.webrtc[482512]: INFO: webrtc:<script>: Store <sip:15551234510@192.168.65.230;line=wc-2wcxwi;x-ts>;expires=583;+sip.ice;reg-id=1;+sip.instance="<urn:uuid:c5631d8b-391c-3771-a2d7-5ea501a3b236>" in local database socket=tls:100.100.100.131:443
Sep  5 10:26:05 p01t-1 kamailio.webrtc[482523]: CRITICAL: <core> [core/pass_fd.c:281]: receive_fd(): EOF on 13
Sep  5 10:26:05 p01t-1 kamailio.webrtc[482508]: ALERT: <core> [main.c:809]: handle_sigs(): child process 482512 exited by a signal 11
Sep  5 10:26:05 p01t-1 kamailio.webrtc[482508]: ALERT: <core> [main.c:813]: handle_sigs(): core was not generated
Sep  5 10:26:05 p01t-1 kamailio.webrtc[482508]: INFO: <core> [main.c:836]: handle_sigs(): terminating due to SIGCHLD
Sep  5 10:26:05 p01t-1 kamailio.webrtc[482510]: INFO: <core> [main.c:892]: sig_usr(): signal 15 received
miconda commented 2 months ago

Can you run kamailio as root user for testing and before set also:

ulimit -c unlimited

In order to get the corefile of the crash. Depending on your OS, the core file can be dumped in different location. If it is not in the / or the working directory set for Kamailio, you have to search on the web to find out where it written by OS. It might even be sent to some application. For example, for Ubuntu, useful details can be found at:

Once you have the corefile, get the output of bt full with gdb and post it here.

schoberw commented 2 months ago

I'm on Debian bullseye.

FTR: I followed https://www.kamailio.org/wiki/tutorials/troubleshooting/coredumpfile Took a while until I read the line First check if the “disable_core_dump” variable is set in your configuration. :-(

sysctl kernel.core_pattern=/tmp/core.%e.%p.%h.%t

root@p01t-1:/var/local/src/kamailio.webrtc# DUMP_CORE=yes /var/local/kamailio-devel/sbin/kamailio -P /var/run/kamailio/kamailio.webrtc.pid -f /var/local/src/kamailio.webrtc/kamailio.cfg -m 48 -M 24 -u root -g root -DD
Sep  5 11:11:13 p01t-1 kamailio.webrtc[495653]: CRITICAL: <core> [core/pass_fd.c:281]: receive_fd(): EOF on 13
Sep  5 11:11:13 p01t-1 kamailio.webrtc[495637]: ALERT: <core> [main.c:809]: handle_sigs(): child process 495642 exited by a signal 11
Sep  5 11:11:13 p01t-1 kamailio.webrtc[495637]: ALERT: <core> [main.c:813]: handle_sigs(): core was generated
Sep  5 11:11:13 p01t-1 kamailio.webrtc[495637]: INFO: <core> [main.c:836]: handle_sigs(): terminating due to SIGCHLD
Sep  5 11:11:13 p01t-1 kamailio.webrtc[495641]: INFO: <core> [main.c:892]: sig_usr(): signal 15 received
Sep  5 11:11:13 p01t-1 kamailio.webrtc[495639]: INFO: <core> [main.c:892]: sig_usr(): signal 15 received

Backtrace attached.

kamailio-6.0.0-dev2-backtrace.txt

miconda commented 2 months ago

The crash is because another (unrelated) commit I did. I just pushed a fix for that one. Can you try again with the very last git master branch and see how it goes this time?

schoberw commented 2 months ago

Yes, working now!

root@p01t-1:~/build-dir/kamailio.webrtc# grep "^tcp_" /var/local/src/kamailio.webrtc/kamailio.cfg
tcp_children=CHILDREN   # def = children
tcp_send_timeout=3
tcp_connect_timeout=10  # def = 10
tcp_connection_lifetime=3605
tcp_rd_buf_size=32768
tcp_max_connections=2048
tcp_connection_match=1  # match also local port # RMT#72522: 5.8.2++ is more strict: a WSS protocol is not finding the TLS socket and cannot send out messages to WSS
tcp_reuse_port=yes      # default: no
tcp_keepalive=yes       # Enables keepalive for tcp (sets SO_KEEPALIVE socket option)
tcp_keepcnt=3           # Number of keepalives sent before dropping the connection (TCP_KEEPCNT socket option). Linux only.
tcp_keepidle=30         # Time before starting to send keepalives, if the connection is idle (TCP_KEEPIDLE socket option). Linux only.
tcp_keepintvl=10        # Time interval between keepalive probes, when the previous probe failed (TCP_KEEPINTVL socket option). Linux only.
tcp_async=yes           # alias tcp_buf_write, default=yes
tcp_crlf_ping=no
tcp_accept_no_cl=yes
root@p01t-1:/var/local/src/kamailio.webrtc# DUMP_CORE=yes /var/local/kamailio-devel/sbin/kamailio -P /var/run/kamailio/kamailio.webrtc.pid -f /var/local/src/kamailio.webrtc/kamailio.cfg -m 48 -M 24 -u root -g root -DD
root@p01t-1:~/build-dir/kamailio.webrtc# /var/local/kamailio-devel/sbin/kamailio -v
version: kamailio 6.0.0-dev2 (x86_64/linux) 601651
flags: USE_TCP, USE_TLS, USE_SCTP, TLS_HOOKS, USE_RAW_SOCKS, DISABLE_NAGLE, USE_MCAST, DNS_IP_HACK, SHM_MMAP, PKG_MALLOC, MEM_JOIN_FREE, Q_MALLOC, F_MALLOC, TLSF_MALLOC, DBG_SR_MEMORY, USE_FUTEX, FAST_LOCK-ADAPTIVE_WAIT, USE_DNS_CACHE, USE_DNS_FAILOVER, USE_NAPTR, USE_DST_BLOCKLIST, HAVE_RESOLV_RES, TLS_PTHREAD_MUTEX_SHARED
ADAPTIVE_WAIT_LOOPS 1024, MAX_RECV_BUFFER_SIZE 262144, MAX_SEND_BUFFER_SIZE 262144, MAX_URI_SIZE 1024, BUF_SIZE 65535, DEFAULT PKG_SIZE 8MB
poll method support: poll, epoll_lt, epoll_et, sigio_rt, select.
id: 601651
compiled on 11:48:41 Sep  5 2024 with gcc 10.2.1
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> read domain from P-Called-Party-ID: ws2.ipbx.at
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> Reduce Session Timer from 1800 to 120
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> setting local socket to tls:100.100.100.131:443
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> checking tcp connection to 101.100.100.129:64056
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> connection id to 101.100.100.129:64056 is: 2
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> OUT-CONTACT: INVITE sip:15551234521@192.168.226.53;line=wc-x9u7ra;x-ts (sip:101.100.100.129:64056;transport=ws) - rP=UDP -> dp=ws nh(P)=ws
.
...

Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >>> Sending Request: INVITE sip:15551234521@192.168.226.53;line=wc-x9u7ra;x-ts (100.100.100.131:443 -> 101.100.100.129:64056: sip:101.100.100.129:64056;transport=ws)
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: INVITE sip:15551234521@192.168.226.53;line=wc-x9u7ra;x-ts SIP/2.0

...

Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: >> FL_TRACE on INVITE set!
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502761]: INFO: webrtc:<script>: Network sending:

...

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
Sep  5 11:50:04 p01t-1 kamailio.webrtc[502774]: INFO: <script>: Network received:
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
SIP/2.0 100 Trying
miconda commented 2 months ago

Thanks for testing and reporting back! Closing it.