openresty / luajit2

OpenResty's Branch of LuaJIT 2
https://luajit.org/luajit.html
Other
1.2k stars 193 forks source link

Segmentation fault during NGINX init worker by lua phase #130

Closed ElvinEfendi closed 2 years ago

ElvinEfendi commented 2 years ago

I can not consistently reproduce this, but this happens on alpine:3.13 with luajit 2.1-20201027 quite frequently when my NGINX workers starts.

Core was generated by `nginx: master process /usr/local/nginx/sbin/nginx -c'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  a_crash () at ./arch/x86_64/atomic_arch.h:108
108 ./arch/x86_64/atomic_arch.h: No such file or directory.
[Current thread is 1 (LWP 10360)]
(gdb) bt full
#0  a_crash () at ./arch/x86_64/atomic_arch.h:108
No locals.
#1  get_nominal_size (end=0x7ffb8e98122c "", p=0x7ffb8e97c980 "") at src/malloc/mallocng/meta.h:169
        reserved = 2220
        reserved = <optimized out>
#2  __libc_free (p=0x7ffb8e97c980) at src/malloc/mallocng/free.c:110
        g = 0x564955c50c48
        idx = 1
        stride = 18704
        start = 0x7ffb8e97c920 " \t\230\216\373\177"
        end = 0x7ffb8e98122c ""
        self = <optimized out>
        all = <optimized out>
        mi = {base = <optimized out>, len = <optimized out>}
#3  0x00007ffba7f30f7b in lj_vm_ffi_call () from /usr/local/lib/libluajit-5.1.so.2
No symbol table info available.
#4  0x00007ffba7f77077 in lj_ccall_func (L=<optimized out>, cd=<optimized out>) at lj_ccall.c:1382
        cc = {func = 0x7ffb9df39290 <chash_point_sort>, spadj = 8, nsp = 0 '\000', retref = 0 '\000', ngpr = 0 '\000', nfpr = 0 '\000', fpr = {{d = {0, 0}, f = {0,
                0, 0, 0}, b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}, {d = {0, 0}, f = {0, 0, 0, 0},
              b = '\000' <repeats 15 times>, s = {0, 0, 0, 0, 0, 0, 0, 0}, i = {0, 0, 0, 0}, l = {0, 0}}}, gpr = {140718548721704, 640, 0, 0, 0, 0}, stack = {
            140718403924864, 140727792257848, 0 <repeats 14 times>, 806486016, 140718832098790, 1, 140718404077952, 2639489468, 140718832113247, 177133138101318528,
            1152921504606846976, 140718404076976, 140718403924976, 24, 140718403924864, 140718404076976, 140718831249675, 140718663504528, 140718403924864}}
        gcsteps = 0
        ret = <optimized out>
        cts = 0x7ffb8e7bd428
        ct = 0x39f0
        sz = <optimized out>
#5  0x00007ffba7f8d38d in lj_cf_ffi_meta___call (L=0x7ffb8e7ab380) at lib_ffi.c:230
        ret = <optimized out>
        cts = 0x7ffb8e7bd428
        cd = <optimized out>
        id = 618
        ct = <optimized out>
        tv = <optimized out>
        mm = MM_call
#6  0x00007ffba7f2eb45 in lj_BC_FUNCC () from /usr/local/lib/libluajit-5.1.so.2
No symbol table info available.
#7  0x00007ffba7f418ff in lua_pcall (L=L@entry=0x7ffb8e7ab380, nargs=nargs@entry=0, nresults=nresults@entry=0, errfunc=errfunc@entry=10) at lj_api.c:1140
        g = 0x7ffb8e7ab3f0
        oldh = 0 '\000'
        ef = 88
        status = <optimized out>
#8  0x0000564953f368aa in ngx_http_lua_do_call (log=log@entry=0x7ffb8e993718, L=L@entry=0x7ffb8e7ab380)
    at /tmp/build/lua-nginx-module-138c1b96423aa26defe00fe64dd5760ef17e5ad8/src/ngx_http_lua_util.c:4233
        status = <optimized out>
        base = 10
        old_pool = 0x0
#9  0x0000564953f4d8ce in ngx_http_lua_init_worker_by_inline (log=0x7ffb8e993718, lmcf=<optimized out>, L=0x7ffb8e7ab380)
    at /tmp/build/lua-nginx-module-138c1b96423aa26defe00fe64dd5760ef17e5ad8/src/ngx_http_lua_initworkerby.c:323
        status = <optimized out>
#10 0x0000564953f4d786 in ngx_http_lua_init_worker (cycle=0x7ffb8e993700)
    at /tmp/build/lua-nginx-module-138c1b96423aa26defe00fe64dd5760ef17e5ad8/src/ngx_http_lua_initworkerby.c:296
        rv = <optimized out>
        cur = <optimized out>
        prev = <optimized out>
        i = <optimized out>
--Type <RET> for more, q to quit, c to continue without paging--
        conf = {name = 0x0, args = 0x0, cycle = 0x7ffb8f169f48, pool = 0x7ffb8e9936b0, temp_pool = 0x0, conf_file = 0x7ffdbe114d40, log = 0x7ffb8e993718,
          ctx = 0x7ffdbe114cc0, module_type = 0, cmd_type = 0, handler = 0x0, handler_conf = 0x0}
        cf_file = {file = {fd = 0, name = {len = 21, data = 0x7ffb8e9939bd "/etc/nginx/nginx.conf"}, info = {st_dev = 0, st_ino = 0, st_nlink = 0, st_mode = 0,
              st_uid = 0, st_gid = 0, __pad0 = 0, st_rdev = 0, st_size = 0, st_blksize = 0, st_blocks = 0, st_atim = {tv_sec = 0, tv_nsec = 0}, st_mtim = {
                tv_sec = 0, tv_nsec = 0}, st_ctim = {tv_sec = 0, tv_nsec = 0}, __unused = {0, 0, 0}}, offset = 0, sys_offset = 0, log = 0x0, thread_handler = 0x0,
            thread_ctx = 0x0, thread_task = 0x0, aio = 0x0, valid_info = 0, directio = 0}, buffer = 0x0, dump = 0x0, line = 0}
        fake_cycle = <optimized out>
        modules = <optimized out>
        file = <optimized out>
        ofile = <optimized out>
        part = <optimized out>
        c = 0x7ffb97202f80
        module = <optimized out>
        r = 0x7ffba79ef060
        ctx = <optimized out>
        conf_ctx = <optimized out>
        http_ctx = {main_conf = 0x7ffb8e9951b8, srv_conf = 0x7ffb8f174108, loc_conf = 0x7ffb8f173f18}
        top_llcf = 0x7ffb9dec2070
        lmcf = 0x7ffb9dec1e38
        clcf = <optimized out>
        top_clcf = 0x7ffb8e995af0
#11 0x0000564953e770ab in ngx_worker_process_init (cycle=cycle@entry=0x7ffb8e993700, worker=<optimized out>) at src/os/unix/ngx_process_cycle.c:955
        set = {__bits = {0, 94872942128080, 402745863, 140718405926464, 1163022147, 16777216, 0, 0, 18446744073709551612, 140718831950715, 8243113905953600622,
            7161627433710387216, 18446744073709551612, 140718831854373, 140727792259056, 94871532601360}}
        n = <optimized out>
        tp = <optimized out>
        i = 67
        cpu_affinity = <optimized out>
        rlmt = {rlim_cur = 261120, rlim_max = 261120}
        ccf = <optimized out>
        ls = <optimized out>
#12 0x0000564953e776d0 in ngx_worker_process_cycle (cycle=0x7ffb8e993700, data=<optimized out>) at src/os/unix/ngx_process_cycle.c:759
        worker = <optimized out>
#13 0x0000564953e756f1 in ngx_spawn_process (cycle=cycle@entry=0x7ffb8e993700, proc=proc@entry=0x564953e776af <ngx_worker_process_cycle>, data=data@entry=0x2,
    name=name@entry=0x564953f9cf4f "worker process", respawn=respawn@entry=-4) at src/os/unix/ngx_process.c:199
        on = 1
        pid = 0
        s = 2
#14 0x0000564953e76310 in ngx_start_worker_processes (cycle=cycle@entry=0x7ffb8e993700, n=4, type=type@entry=-4) at src/os/unix/ngx_process_cycle.c:378
        i = 2
        ch = {command = 1, pid = 10359, slot = 1, fd = 10}
#15 0x0000564953e78359 in ngx_master_process_cycle (cycle=0x7ffb8e993700, cycle@entry=0x7ffba7b2b200) at src/os/unix/ngx_process_cycle.c:234
        title = <optimized out>
        p = <optimized out>
        size = <optimized out>
        i = <optimized out>
        sigio = 0
        set = {__bits = {0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 140718827024920, 0, 0, 0, 140718832072362}}
        itv = {it_interval = {tv_sec = 0, tv_usec = 0}, it_value = {tv_sec = 0, tv_usec = 0}}
        live = 1
        delay = 0
        ccf = 0x7ffb8e994c38
#16 0x0000564953e4aad9 in main (argc=<optimized out>, argv=<optimized out>) at src/core/nginx.c:386
        b = <optimized out>
        log = 0x5649540168c0 <ngx_log>
        i = <optimized out>
        cycle = 0x7ffba7b2b200
        init_cycle = {conf_ctx = 0x0, pool = 0x7ffba7b50600, log = 0x5649540168c0 <ngx_log>, new_log = {log_level = 0, file = 0x0, connection = 0,
            disk_full_time = 0, handler = 0x0, data = 0x0, writer = 0x0, wdata = 0x0, action = 0x0, next = 0x0}, log_use_stderr = 0, files = 0x0,
          free_connections = 0x0, free_connection_n = 0, modules = 0x0, modules_n = 0, modules_used = 0, reusable_connections_queue = {prev = 0x0, next = 0x0},
          reusable_connections_n = 0, connections_reuse_time = 0, listening = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, paths = {elts = 0x0,
            nelts = 0, size = 0, nalloc = 0, pool = 0x0}, config_dump = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, config_dump_rbtree = {root = 0x0,
--Type <RET> for more, q to quit, c to continue without paging--
            sentinel = 0x0, insert = 0x0}, config_dump_sentinel = {key = 0, left = 0x0, right = 0x0, parent = 0x0, color = 0 '\000', data = 0 '\000'}, open_files = {
            last = 0x0, part = {elts = 0x0, nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, shared_memory = {last = 0x0, part = {elts = 0x0, nelts = 0,
              next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, connection_n = 0, files_n = 0, connections = 0x0, read_events = 0x0, write_events = 0x0,
          old_cycle = 0x0, conf_file = {len = 21, data = 0x7ffdbe116848 "l/nginx/sbin/nginx -c /etc/nginx/nginx.conf"}, conf_param = {len = 0, data = 0x0},
          conf_prefix = {len = 11, data = 0x7ffdbe116848 "l/nginx/sbin/nginx -c /etc/nginx/nginx.conf"}, prefix = {len = 17,
            data = 0x564953f9719f "/usr/local/nginx/"}, error_log = {len = 24, data = 0x564953f971c7 "/var/log/nginx/error.log"}, lock_file = {len = 0, data = 0x0},
          hostname = {len = 0, data = 0x0}}
        cd = <optimized out>
        ccf = <optimized out>
doujiang24 commented 2 years ago

It's hard to debug based on the current info. For me, I can only see it's calling by using ffi.

Could you provide a minimal example to reproduce it? or more other information? like, try to reproduce in other environments using Glibc instead of musl-libc.

ElvinEfendi commented 2 years ago

It's hard to debug based on the current info. For me, I can only see it's calling by using ffi.

agreed, just wanted to try my chances

The issue was gone after I updated our image to Alpine 3.14.2 (from 3.13), updated NGINX to the latest tested within Openresty, updated all the other lua resty modules we use.

I think we can close this now. I'll re-open with more details if the issue comes back again.