slact / nchan

Fast, horizontally scalable, multiprocess pub/sub queuing server and proxy for HTTP, long-polling, Websockets and EventSource (SSE), powered by Nginx.
https://nchan.io/
Other
3.01k stars 292 forks source link

"subrequests cycle" error after 50 websocket publish requests with nchan_publisher_upstream_request (1.9.15) #185

Closed valydvweb closed 8 years ago

valydvweb commented 8 years ago

Hello,

I did some tests with a greater number of requests (3-4 packets / second) and after 1-2 minutes a connection problem and the error below (in NGINX logs)

2016/05/19 03:17:58 [error] 17266#0: *3 subrequests cycle while processing "/pub-test" while sending to client, client: myip, server: mydomain.com, request: "GET /test HTTP/1.1", host: "mydomain.com"
2016/05/19 03:17:58 [alert] 17265#0: worker process 17266 exited on signal 11

This is my NGINX conf (for testing)

location /test {
    nchan_pubsub;
    nchan_channel_id "test";
    nchan_channel_group "gr";
    nchan_message_buffer_length 1000;
    nchan_message_timeout 48h;
    nchan_publisher_upstream_request "/pub-test";
}

location "/pub-test" {
    proxy_pass "http://mydomain.com/pub.php";
}

This is pub.php test script:

<?php
echo file_get_contents('php://input');
?>

The same problem occurs in other configuration (not just PHP-FPM)

EG:

location "/pub-test" {
    fastcgi_pass   127.0.0.1:8002;
    include fastcgi_params;
}

And then spawns FastCGI process (I have compiled a small script in C)


I use NGINX 1.9.15 with NCHAN (last)

Thank's Valy

slact commented 8 years ago

what are you using to publish and subscribe? could you show me the backtrace from the coredump?

valydvweb commented 8 years ago

I use HTML5 WebSocket This is my test script

<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Test</title>
</head>

<body>
<script>

window.onload = function() {
    var cn = new WebSocket('ws://mydomain.com/test'),
    msg = document.getElementById('msg');

cn.onopen = function() {
    console.log('Open test connection');
}

function send() {
    cn.send(msg.value);
}

cn.onmessage = function(e) {
    console.log('Receiving test message: ' + e.data);
}

    msg.addEventListener('keypress', function(e) {
        if (e.which == 13 || e.keyCode == 13) {
            e.preventDefault();
            cn.send(msg.value.trim());
        }
    });
}
</script>
<textarea name="msg" id="msg" style="width:100%; height:200px;"></textarea>
</body>
</html>
valydvweb commented 8 years ago

backtrace from the coredump ... for NGINX But ... I have no idea how to do this

valydvweb commented 8 years ago

https://www.nginx.com/resources/wiki/start/topics/tutorials/debugging/

I am close ?

slact commented 8 years ago

Ok, don't worry about the backtrace.

valydvweb commented 8 years ago
(gdb) backtrace full
#0  0x00000000004d67a8 in nchan_adjust_subrequest (sr=0x14b94e0, method=8, method_name=0x757bc0, request_body=0x14bb500, content_length_n=34,
    content_len_str=0x0) at /root/nchan/src/util/nchan_subrequest.c:137
        r = 0x0
        body = 0x22
        rc = 21730712
#1  0x00000000004dca44 in websocket_publish (fsub=0x10a4b60, buf=0x7ffe7b6f53c0, fb_pool=0x14b9490) at /root/nchan/src/subscribers/websocket.c:470
        psr = 0x14b9508
        psr_stuff = 0x14b9508
        psrd = 0x14b9518
        r = 0x10ed980
        sr = 0x14b94e0
        fakebody = 0x14bb500
        fakebody_chain = 0x14bb540
        fakebody_buf = 0x14bb550
        sz = 34
        nopublishing = {len = 23, data = 0x51de8c "Publishing not allowed."}
        POST_REQUEST_STRING = {len = 4, data = 0x51dea4 "POST "}
        publisher_upstream_request_url_ccv = 0x10d8740
#2  0x00000000004de233 in websocket_reading (r=0x10ed980) at /root/nchan/src/subscribers/websocket.c:977
        ctx = 0x10ee6e0
        fsub = 0x10a4b60
        frame = 0x10a4c98
        rc = 0
        rev = 0x112fcc0
        c = 0x10f7fb0
        i = 34
        buf = {pos = 0x14b94e0 "ccccccccgfdgdgdfgdfgdfgdfgdgdfgdfg\v\001", last = 0x14b9502 "\v\001", file_pos = 0, file_last = 0,
          start = 0x14b94e0 "ccccccccgfdgdgdfgdfgdfgdfgdgdfgdfg\v\001", end = 0x14b9502 "\v\001", tag = 0x0, file = 0x0, shadow = 0x0, temporary = 0,
          memory = 1, mmap = 0, recycled = 0, in_file = 0, flush = 0, sync = 0, last_buf = 0, last_in_chain = 0, last_shadow = 0, temp_file = 0, num = 0}
        temp_pool = 0x14b9490
        free_temp_pool = 1
        msgbuf = {pos = 0x14b94e0 "ccccccccgfdgdgdfgdfgdfgdfgdgdfgdfg\v\001", last = 0x14b9502 "\v\001", file_pos = 0, file_last = 0,
          start = 0x14b94e0 "ccccccccgfdgdgdfgdfgdfgdfgdgdfgdfg\v\001", end = 0x14b9502 "\v\001", tag = 0x0, file = 0x0, shadow = 0x0, temporary = 0,
          memory = 1, mmap = 0, recycled = 0, in_file = 0, flush = 1, sync = 0, last_buf = 1, last_in_chain = 1, last_shadow = 0, temp_file = 0, num = 0}
        close_code = 32766
        close_reason = {len = 17439664, data = 0x417120 "1\355I\211\321^H\211\342H\203\344\360PTI\307\300\320\305P"}
#3  0x000000000046e863 in ngx_http_request_handler (ev=0x112fcc0) at src/http/ngx_http_request.c:2194
        c = 0x10f7fb0
        r = 0x10ed980
#4  0x0000000000450f1e in ngx_epoll_process_events (cycle=0x10997d0, timer=3255, flags=1) at src/event/modules/ngx_epoll_module.c:822
        events = 1
        revents = 5
        instance = 1
        i = 0
        level = 7715744
        err = 0
        rev = 0x112fcc0
        wev = 0x445346
        queue = 0xcb7
        c = 0x10f7fb0
#5  0x0000000000443336 in ngx_process_events_and_timers (cycle=0x10997d0) at src/event/ngx_event.c:242
        flags = 1
        timer = 3255
        delta = 1463622998431
#6  0x000000000044eb48 in ngx_worker_process_cycle (cycle=0x10997d0, data=0x0) at src/os/unix/ngx_process_cycle.c:753
        worker = 0
#7  0x000000000044baba in ngx_spawn_process (cycle=0x10997d0, proc=0x44ea89 <ngx_worker_process_cycle>, data=0x0, name=0x510af8 "worker process",
    respawn=-3) at src/os/unix/ngx_process.c:198
        on = 1
---Type <return> to continue, or q <return> to quit---
        pid = 0
        s = 0
#8  0x000000000044dd7a in ngx_start_worker_processes (cycle=0x10997d0, n=1, type=-3) at src/os/unix/ngx_process_cycle.c:358
        i = 0
        ch = {command = 1, pid = 0, slot = 0, fd = 0}
#9  0x000000000044d4ab in ngx_master_process_cycle (cycle=0x10997d0) at src/os/unix/ngx_process_cycle.c:130
        title = 0x10f3735 "master process /usr/sbin/nginx -c /etc/nginx/nginx.conf"
        p = 0x10f376c ""
        size = 56
        i = 3
        n = 0
        sigio = 17405928
        set = {__val = {0 <repeats 16 times>}}
        itv = {it_interval = {tv_sec = 0, tv_usec = 1}, it_value = {tv_sec = 9, tv_usec = 18}}
        live = 140730969315312
        delay = 4302798
        ls = 0x6
        ccf = 0x109b2a8
#10 0x000000000041783e in main (argc=3, argv=0x7ffe7b6f5b58) at src/core/nginx.c:367
        b = 0x7f470e104ba0
        log = 0x7594a0
        i = 0
        cycle = 0x10997d0
        init_cycle = {conf_ctx = 0x0, pool = 0x1099220, log = 0x7594a0, new_log = {log_level = 0, file = 0x0, connection = 0, disk_full_time = 0,
            handler = 0, data = 0x0, writer = 0, wdata = 0x0, action = 0x0, next = 0x0}, log_use_stderr = 0, files = 0x0, free_connections = 0x0,
          free_connection_n = 0, modules = 0x0, modules_n = 0, modules_used = 0, reusable_connections_queue = {prev = 0x0, next = 0x0}, listening = {
            elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, paths = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, config_dump = {
            elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, open_files = {last = 0x0, part = {elts = 0x0, nelts = 0, next = 0x0}, size = 0,
            nalloc = 0, pool = 0x0}, shared_memory = {last = 0x0, part = {elts = 0x0, nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0},
          connection_n = 0, files_n = 0, connections = 0x0, read_events = 0x0, write_events = 0x0, old_cycle = 0x0, conf_file = {len = 21,
            data = 0x7ffe7b6f6f7d "ss"}, conf_param = {len = 0, data = 0x0}, conf_prefix = {len = 11, data = 0x7ffe7b6f6f7d "ss"}, prefix = {len = 5,
            data = 0x50d37c "/usr/"}, lock_file = {len = 0, data = 0x0}, hostname = {len = 0, data = 0x0}}
        cd = 0x7ffe7b6f5b50
        ccf = 0x109b2a8
slact commented 8 years ago

that's great, thanks

slact commented 8 years ago

fixed in 0170dc6. The logic for this had changed in nginx 1.9.4, and needed a corresponding change to nchan.