tarantool / zookeeper

ZooKeeper client for Tarantool
BSD 2-Clause "Simplified" License
2 stars 0 forks source link

Global Watcher crashed #13

Open seet61 opened 2 years ago

seet61 commented 2 years ago
$ tarantool --version
Tarantool 2.10.0-0-g0a5ce0b9c
Target: Linux-x86_64-RelWithDebInfo
Build options: cmake . -DCMAKE_INSTALL_PREFIX=/usr -DENABLE_BACKTRACE=ON
Compiler: /usr/bin/cc /usr/lib/ccache/g++
C_FLAGS:-g -O2 -ffile-prefix-map=/build/tarantool-2.10.0=. -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fexceptions -funwind-tables -fno-common -fopenmp -msse2 -std=c11 -Wall -Wextra -Wno-strict-aliasing -Wno-char-subscripts -Wno-format-truncation -Wno-gnu-alignof-expression -fno-gnu89-inline -Wno-cast-function-type
CXX_FLAGS:-g -O2 -ffile-prefix-map=/build/tarantool-2.10.0=. -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fexceptions -funwind-tables -fno-common -fopenmp -msse2 -std=c++11 -Wall -Wextra -Wno-strict-aliasing -Wno-char-subscripts -Wno-format-truncation -Wno-invalid-offsetof -Wno-gnu-alignof-expression -Wno-cast-function-type

I have function for load config from zoo:

function load_global_cfg(zoo_connect, local_cfg)
    log.debug("load global cfg")
    local temp_config = {}
    log.debug(zoo_connect ~= nil)
    log.debug(zoo_connect:is_connected())
    local exists, stat, rc = zoo_connect:exists(local_cfg.zookeeper_app_path)
    if rc ~= zkconst.ZOK then
        log.error('rc: ' .. rc)
    else
        log.debug('rc: ' .. rc)
        local child_vars = zoo_connect:get_children(local_cfg.zookeeper_app_path, true)
        for _, val in pairs(child_vars) do
            local temp_sub_config = {}
            local child_sub_vars = zoo_connect:get_children(local_cfg.zookeeper_app_path .. "/" .. val, true)
            for _, sub_val in pairs(child_sub_vars) do
                temp_sub_config[sub_val] = zoo_connect:get(local_cfg.zookeeper_app_path .. "/" .. val .. "/" .. sub_val, true)
            end
            temp_config[val] = temp_sub_config
        end
    end
    global_cfg = temp_config
    log.debug('handler global_cfg: ' .. json.encode(global_cfg))
    return global_cfg
end

And global watcher for them:

function global_cfg_watcher(z, type, state, path, context)
    log.debug(string.format(
            'Global watcher. type = %s, state = %s, path = %s',
            zookeeper.const.watch_types_rev[type],
            zookeeper.const.states_rev[state],
            path))
    log.debug('local_cfg: ' .. json.encode(local_cfg))
    fiber.create(load_global_cfg, zoo_connect, local_cfg)
    fiber.create(load_common_cfg, zoo_connect, local_cfg)
    log.debug('Extra context:', json.encode(context))
end

After start everithing work correctly. But after some differrent time I have a crash of watcher and dwon node. router.txt

Error:

{"time": "2022-05-30T13:52:50.333+0300", "level": "DEBUG", "message": "Global watcher dispatch. L=0x7f645f48 cbref=89 internal_ctx_ref=91 user_ctx_ref=92 | type=3 state=3 path=\/esb\/Ferrari\/config\/WhoIsItServicePortType\/switch", "pid":
5943 , "cord_name": "main", "fiber_id": 129, "fiber_name": "zookeeper_process", "file": "\/tmp\/luarocks_zookeeper-scm-1-SYuzOS\/zookeeper\/zookeeper\/driver.c", "line": 81}
{"time": "2022-05-30T13:52:50.373+0300", "level": "DEBUG", "message": "crash: crash dump: crash: crash dump: ion\":\"1\",\"data\":{\"uname\":{\"sysname\":\"Linux\",\"release\":\"4.4.0-19041-Microsoft\",\"version\":\"#488-Microsoft Mon Sep 01 13:43:00 PST 2020\",\"machine\":\"x86_64\"},\"instance\":{\"server_id\":\"1ebb9c36-db0b-43ae-8409-cfa0465e2fc1\",\"cluster_id\":\"a5eed88a-fc6d-4e0c-a7cc-717ace832694\",\"uptime\":\"4385\"},\"build\":{\"version\":\"2.10.0-0-g0a5ce0b9c\",\"cmake_type\":\"Linux-x86_64-RelWithDebInfo\"},\"signal\":{\"signo\":11,\"si_code\":1,\"si_code_str\":\"SEGV_MAPERR\",\"si_addr\":\"0x118\",\"backtrace\":\"#1  0x7ff757d8d071 in crash_signal_cb+161\\n#2  0x7ff756eb2140 in __restore_rt+0\\n#3  0x7ff757dccec3 in lua_rawgeti+339\\n#4  0x7ff7575e76e1 in watcher_dispatch+129\\n#5  0x7ff754a01639 in zoo_set_debug_level+2025\\n#6  0x7ff7549f736d in zookeeper_init+3389\\n#7  0x7ff7549fb325 in zookeeper_process+3381\\n#8  0x7ff7575e81b1 in lua_zoo_process+209\\n#9  0x7ff757dc7093 in lj_BC_FUNCC+70\\n#10 0x7ff757dcde14 in lua_pcall+116\\n#11 0x7ff757d7748b in luaT_call+11\\n#12 0x7ff757d71f84 in lua_fiber_run_f+84\\n#13 0x7ff757c0f58d in fib", "pid": 5943 , "cord_name":
"main", "fiber_id": 129, "fiber_name": "zookeeper_process", "file": ".\/src\/lib\/core\/crash.c", "line": 385}
seet61 commented 2 years ago

Centos 8. Instance down with disabled watcher Core file https://cloud.mail.ru/public/jkvC/HjYE5v7iD

# gdb `which tarantool` core.tarantool.0.40bf93c9f488467794c39ddc6061685e.375805.1659851156000000

[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `tarantool init.lua <running>: tarantool_ferrari@routerA  '.
Program terminated with signal SIGABRT, Aborted.
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
50        return ret;
[Current thread is 1 (Thread 0x7f095d01aac0 (LWP 375805))]
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007f09582c5db5 in __GI_abort () at abort.c:79
#2  0x000055c6c9dbe216 in crash_signal_cb (signo=<optimized out>, siginfo=<optimized out>, context=<optimized out>) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/src/lib/core/crash.c:531
#3  <signal handler called>
#4  lj_tab_getinth (t=0x41965f28, key=78) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/third_party/luajit/src/lj_tab.c:411
#5  0x000055c6c9def230 in lua_rawgeti (L=L@entry=0x41050248, idx=idx@entry=-10000, n=<optimized out>) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/third_party/luajit/src/lj_api.c:851
#6  0x00007f09565a5d51 in watcher_dispatch (zh=<optimized out>, type=-1, state=1, path=0x55c6cb9e4280 "", watcherctx=<optimized out>) at /tmp/luarocks_zookeeper-scm-1-zmT6ES/zookeeper/zookeeper/driver.c:85
#7  0x00007f0955df3e72 in do_foreach_watcher (state=1, type=-1, path=0x55c6cb9e4280 "", zh=0x55c6caa49190, wo=0x55c6cc11b710) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zk_hashtable.c:279
#8  deliverWatchers (zh=zh@entry=0x55c6caa49190, type=-1, state=1, path=0x55c6cb9e4280 "", list=list@entry=0x55c6cbba2d80) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zk_hashtable.c:321
#9  0x00007f0955de9738 in process_completions (zh=zh@entry=0x55c6caa49190) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:2824
#10 0x00007f0955dea010 in queue_session_event (zh=zh@entry=0x55c6caa49190, state=state@entry=1) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:2617
#11 0x00007f0955dea12e in cleanup (zh=zh@entry=0x55c6caa49190, rc=rc@entry=-4) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:1782
#12 0x00007f0955dea2a5 in handle_error (zh=zh@entry=0x55c6caa49190, rc=rc@entry=-4) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:1799
#13 0x00007f0955dea400 in handle_socket_error_msg (zh=zh@entry=0x55c6caa49190, line=line@entry=2495, rc=rc@entry=-4, format=format@entry=0x7f0955df6858 "failed while receiving a server response")
    at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:1820
#14 0x00007f0955debc0d in check_events (events=<optimized out>, zh=0x55c6caa49190) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:2495
#15 zookeeper_process (zh=0x55c6caa49190, events=<optimized out>) at /usr/src/debug/apache-zookeeper-3.5.9/zookeeper-client/zookeeper-client-c/src/zookeeper.c:2886
#16 0x00007f09565a6fe5 in lua_zoo_process (L=<optimized out>) at /tmp/luarocks_zookeeper-scm-1-zmT6ES/zookeeper/zookeeper/driver.c:834
#17 0x000055c6c9e2ad0b in lj_BC_FUNCC () at buildvm_x86.dasc:811
#18 0x000055c6c9defb6c in lua_pcall (L=L@entry=0x412e1aa8, nargs=<optimized out>, nresults=<optimized out>, errfunc=errfunc@entry=0) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/third_party/luajit/src/lj_api.c:1158
#19 0x000055c6c9da6e6f in luaT_call (L=0x412e1aa8, nargs=<optimized out>, nreturns=<optimized out>) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/src/lua/utils.c:457
#20 0x000055c6c9da2079 in lua_fiber_run_f (ap=<error reading variable: value has been optimized out>) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/src/lua/fiber.c:451
#21 0x000055c6c9c24e20 in fiber_cxx_invoke(fiber_func, typedef __va_list_tag __va_list_tag *) (f=<optimized out>, ap=<optimized out>) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/src/lib/core/fiber.h:923
#22 0x000055c6c9dc4d10 in fiber_loop (data=<optimized out>) at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/src/lib/core/fiber.c:927
#23 0x000055c6c9fdacd1 in coro_init () at /usr/src/debug/tarantool-2.8.4.0-1.el8.x86_64/third_party/coro/coro.c:110