Assert error in h2_del_req(), http2/cache_http2_proto.c line 171

simonvik commented 6 years ago

Panic at: Wed, 14 Feb 2018 03:35:26 GMT
Assert error in h2_del_req(), http2/cache_http2_proto.c line 171:
  Condition((r2)->magic == 0x03411584) not true.
version = varnish-trunk revision 1506c55872ac31dee1a7884e5631bebd505351e0, vrt api = 6.2
ident = Linux,4.4.30-20161130-1423-0a2a05d,x86_64,-junix,-smalloc,-sdefault,-hcritbit,epoll
now = 33395436.578285 (mono), 1518579326.213292 (real)
Backtrace:
  0x43849b: /usr/sbin/varnishd() [0x43849b]
  0x482990: /usr/sbin/varnishd(VAS_Fail+0x40) [0x482990]
  0x4633c1: /usr/sbin/varnishd(h2_del_req+0x251) [0x4633c1]
  0x466978: /usr/sbin/varnishd(h2_new_session + 1624) [0x466978]
  0x4531bf: /usr/sbin/varnishd() [0x4531bf]
  0x453620: /usr/sbin/varnishd() [0x453620]
  0x7f14a636f6ba: /lib/x86_64-linux-gnu/libpthread.so.0(+0x76ba) [0x7f14a636f6ba]
  0x7f14a60a53dd: /lib/x86_64-linux-gnu/libc.so.6(clone+0x6d) [0x7f14a60a53dd]
thread = (cache-worker)
thr.req = 0x7f1372ea0020 {
  vxid = 162234369, transport = H2
  step = 0x0,
  req_body = R_BODY_INIT,
  err_code = 1, err_reason = (null),
  restarts = 0, esi_level = 0,
  sp = 0x7f1372e97220 {
    fd = 11773, vxid = 162234369,
    t_open = 1518579316.153832,
    t_idle = 1518579321.208179,
    ws = 0x7f1372e97250 {
      id = \"ses\",
      {s, f, r, e} = {0x7f1372e97288, +192, (nil), +368},
    },
    transport = H2 {
      streams {
        0x00000000 idle  
        0x00000000 idle  
      }
    }
    client = ******** 58945,
  },
  ws = 0x7f1372ea01c0 {
    id = \"req\",
    {s, f, r, e} = {0x7f1372ea2030, +728, (nil), +122824},
  },
  http_conn = 0x7f1372ea0130 {
    fd = 11773 (@0x7f1372e97238),
    doclose = NULL,
    ws = 0x7f1372ea01c0 {
      [Already dumped, see above]
    },
    {rxbuf_b, rxbuf_e} = {0x7f1372ea2308, 0x7f1372ea2308},
    {pipeline_b, pipeline_e} = {(nil), (nil)},
    content_length = 0,
    body_status = none,
    first_byte_timeout = 0.000000,
    between_bytes_timeout = 0.000000,
  },
  http[req] = 0x7f1372ea0260 {
    ws = (nil) {
    },
    hdrs {
    },
  },
  vmods = {
    std = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    one = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    maxminddb = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    directors = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    xkey = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    header = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 6.2},
    cookie = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 6.2},
    vsthrottle = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 6.2},
    tcp = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    bodyaccess = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
    blob = {Varnish trunk 1506c55872ac31dee1a7884e5631bebd505351e0, 0.0},
  },
  flags = {
  },
  privs = 0x7f1372ea0248 {
  },
},
thr.busyobj = (nil) {
},

Running on ubuntu 16.04 Varnish built from master, commit 1506c55872ac31dee1a7884e5631bebd505351e0 nm-output.txt

daghf commented 6 years ago

One possible thing that would definitely trigger a panic like this is if we were unable to schedule a stream after being waitlisted. The current error handling in h2_reembark does a somewhat premature Req_Release, which will release the req and thus zero the workspace that is the storage for the corresponding h2_req.

This could explain why the panic is showing two separate streams both with ID 0, and also why the magic test failed.

I'll cook up a patch to sort out the h2_reembark logic.

daghf commented 6 years ago

test case:

varnishtest "#2563: Panic after reembark failure"

barrier b1 cond 2
barrier b2 cond 2

server s1 {
    rxreq
    expect req.url == "/foo"
    send "HTTP/1.0 200 OK\r\nConnection: close\r\n\r\n"
    delay .2
    barrier b1 sync
    delay .2
    send "line1\n"
    delay .2
    barrier b2 sync
    send "line2\n"
} -start

varnish v1 -vcl+backend {
    sub vcl_backend_response {
        set beresp.do_stream = false;
    }
} -start

varnish v1 -cliok "param.set feature +http2"
varnish v1 -cliok "param.set debug +failresched"
varnish v1 -cliok "param.set debug +waitinglist"
varnish v1 -cliok "param.set debug +syncvsl"

client c1 {
    txreq -url "/foo" -hdr "client: c1"
    rxresp
    expect resp.status == 200
    expect resp.bodylen == 12
    expect resp.http.x-varnish == "1001"
} -start

barrier b1 sync

client c2 {
    stream 1 {
        txreq -url "/foo" -hdr "client" "c2"
        delay .2
        barrier b2 sync
        rxrst
        expect rst.err == REFUSED_STREAM
    } -run
} -run

client c1 -wait

varnish v1 -vsl_catchup
varnish v1 -expect busy_sleep >= 1
varnish v1 -expect busy_wakeup == 0
varnish v1 -expect busy_killed == 1

varnishcache / varnish-cache

Assert error in h2_del_req(), http2/cache_http2_proto.c line 171 #2563