Closed Karimerto closed 4 years ago
hmm... too bad here :(
I am trying to reproduce here on lab but didnt get SEGV
keepalived.conf: include /etc/keepalived/conf.d/*.conf
and
/etc/keepalived/conf.d/vi1.conf: vrrp_instance VI_1 { state MASTER interface virbr0 virtual_router_id 50 priority 150 advert_int 1 virtual_ipaddress { 10.1.1.100 } }
did you get the core file ? if so would it be possible to get the backtrace from it ?
Perhaps you are running somewhat different environment? I am running an older Ubuntu 16.04 (as it says in the bug report). I think I can get the trace from the backup server. It has exactly the same situation as this one, so it is certainly not an isolated case. I will get back to you with the trace.
I am not quite sure how I should run a snap package via gdb
. Some instructions here might be helpful. I do have an strace
that does show the crash, if that's at all helpful.
sudo strace -f -p 5501
strace: Process 5501 attached
sendto(3, "<14>Jul 11 17:55:14 Keepalived[5"..., 94, MSG_NOSIGNAL, NULL, 0) = 94
timerfd_settime(6, 0, {it_interval={0, 0}, it_value={1, 25870000}}, NULL) = 0
epoll_wait(5, [{EPOLLIN, {u32=3391364448, u64=94119009721696}}], 2, -1) = 1
read(6, "\1\0\0\0\0\0\0\0", 8) = 8
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fdae22f1ad0) = 5517
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 75, MSG_NOSIGNAL, NULL, 0) = 75
timerfd_settime(6, 0, {it_interval={0, 0}, it_value={9223372036854775807, 0}}, NULL) = 0
epoll_wait(5, strace: Process 5517 attached
<unfinished ...>
[pid 5517] set_robust_list(0x7fdae22f1ae0, 24) = 0
[pid 5517] prctl(PR_SET_PDEATHSIG, SIGTERM) = 0
[pid 5517] close(9) = 0
[pid 5517] close(10) = 0
[pid 5517] close(11) = 0
[pid 5517] open("/run/vrrp.pid", O_WRONLY|O_CREAT|O_TRUNC|O_NOFOLLOW, 0644) = 9
[pid 5517] fcntl(9, F_GETFL) = 0x28001 (flags O_WRONLY|O_LARGEFILE|O_NOFOLLOW)
[pid 5517] fchmod(9, 0644) = 0
[pid 5517] fstat(9, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
[pid 5517] write(9, "5517\n", 5) = 5
[pid 5517] close(9) = 0
[pid 5517] close(5) = 0
[pid 5517] close(6) = 0
[pid 5517] close(7) = 0
[pid 5517] rt_sigaction(SIGHUP, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [HUP], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGINT, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [INT], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGTERM, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [TERM], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGCHLD, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [CHLD], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGUSR1, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [USR1], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGUSR2, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [USR2], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGXCPU, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [XCPU], NULL, 8) = 0
[pid 5517] rt_sigaction(SIGRT_4, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [RT_4], NULL, 8) = 0
[pid 5517] epoll_create1(EPOLL_CLOEXEC) = 5
[pid 5517] timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC|TFD_NONBLOCK) = 6
[pid 5517] signalfd4(-1, [], 8, SFD_CLOEXEC|SFD_NONBLOCK) = 7
[pid 5517] epoll_ctl(5, EPOLL_CTL_ADD, 6, {EPOLLIN, {u32=3391364448, u64=94119009721696}}) = 0
[pid 5517] epoll_ctl(5, EPOLL_CTL_ADD, 7, {EPOLLIN, {u32=3391351136, u64=94119009708384}}) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [HUP], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP], 8, 0) = 7
[pid 5517] rt_sigaction(SIGHUP, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [INT], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT], 8, 0) = 7
[pid 5517] rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [TERM], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT TERM], 8, 0) = 7
[pid 5517] rt_sigaction(SIGTERM, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [USR1], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT USR1 TERM], 8, 0) = 7
[pid 5517] rt_sigaction(SIGUSR1, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [USR2], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT USR1 USR2 TERM], 8, 0) = 7
[pid 5517] rt_sigaction(SIGUSR2, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [RT_31], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT USR1 USR2 TERM RT_31], 8, 0) = 7
[pid 5517] rt_sigaction(SIGRT_31, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_BLOCK, [RT_4], NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT USR1 USR2 TERM RT_4 RT_31], 8, 0) = 7
[pid 5517] rt_sigaction(SIGRT_4, {SIG_DFL, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] signalfd4(7, [HUP INT USR1 USR2 TERM RT_4 RT_31], 8, 0) = 7
[pid 5517] rt_sigaction(SIGPIPE, {SIG_IGN, [], SA_RESTORER, 0x7fdadf458390}, NULL, 8) = 0
[pid 5517] rt_sigprocmask(SIG_UNBLOCK, [PIPE], NULL, 8) = 0
[pid 5517] socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_ROUTE) = 9
[pid 5517] bind(9, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
[pid 5517] setsockopt(9, SOL_NETLINK, 1, [1], 4) = 0
[pid 5517] setsockopt(9, SOL_NETLINK, 1, [5], 4) = 0
[pid 5517] setsockopt(9, SOL_NETLINK, 1, [9], 4) = 0
[pid 5517] getsockname(9, {sa_family=AF_NETLINK, pid=5517, groups=00000111}, [12]) = 0
[pid 5517] setsockopt(9, SOL_SOCKET, SO_RCVBUF, [65536], 4) = 0
[pid 5517] sendto(3, "<14>Jul 11 17:55:15 Keepalived_v"..., 79, MSG_NOSIGNAL, NULL, 0) = 79
[pid 5517] epoll_ctl(5, EPOLL_CTL_ADD, 9, {EPOLLIN, {u32=3391353392, u64=94119009710640}}) = 0
[pid 5517] socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 10
[pid 5517] bind(10, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
[pid 5517] getsockname(10, {sa_family=AF_NETLINK, pid=-1285737049, groups=00000000}, [12]) = 0
[pid 5517] setsockopt(10, SOL_SOCKET, SO_RCVBUF, [65536], 4) = 0
[pid 5517] sendto(3, "<14>Jul 11 17:55:15 Keepalived_v"..., 85, MSG_NOSIGNAL, NULL, 0) = 85
[pid 5517] sendto(10, "(\0\0\0\22\0\1\3\324\322\t_\0\0\0\0\21\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 96
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 3256
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"(\3\0\0\20\0\2\0\324\322\t_\2475]\263\0\0\4\3\1\0\0\0I\0\1\0\0\0\0\0"..., 3256}], msg_controllen=0, msg_flags=0}, 0) = 3256
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 1632
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"0\3\0\0\20\0\2\0\324\322\t_\2475]\263\0\0\1\0\5\0\0\0C\20\1\0\0\0\0\0"..., 3256}], msg_controllen=0, msg_flags=0}, 0) = 1632
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0\324\322\t_\2475]\263\0\0\0\0", 3256}], msg_controllen=0, msg_flags=0}, 0) = 20
[pid 5517] sendto(10, "(\0\0\0\26\0\1\3\325\322\t_\0\0\0\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 96
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 164
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0\325\322\t_\2475]\263\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 164}], msg_controllen=0, msg_flags=0}, 0) = 164
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0\325\322\t_\2475]\263\0\0\0\0", 164}], msg_controllen=0, msg_flags=0}, 0) = 20
[pid 5517] sendto(10, "(\0\0\0\26\0\1\3\326\322\t_\0\0\0\0\n\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 96
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 576
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0\326\322\t_\2475]\263\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 576}], msg_controllen=0, msg_flags=0}, 0) = 576
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{NULL, 0}], msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 20
[pid 5517] recvmsg(10, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0\326\322\t_\2475]\263\0\0\0\0", 576}], msg_controllen=0, msg_flags=0}, 0) = 20
[pid 5517] stat("/etc/keepalived/keepalived.conf", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
[pid 5517] stat("/etc/keepalived/keepalived.conf", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
[pid 5517] sendto(3, "<14>Jul 11 17:55:15 Keepalived_v"..., 90, MSG_NOSIGNAL, NULL, 0) = 90
[pid 5517] open("/etc/keepalived/keepalived.conf", O_RDONLY) = 11
[pid 5517] fstat(11, {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
[pid 5517] open(".", O_RDONLY|O_DIRECTORY|O_PATH) = 12
[pid 5517] chdir("/etc/keepalived") = 0
[pid 5517] fstat(11, {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
[pid 5517] read(11, "# Dummy file that includes the a"..., 4096) = 92
[pid 5517] open("/etc/keepalived/conf.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 13
[pid 5517] fstat(13, {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 5517] getdents(13, /* 3 entries */, 32768) = 80
[pid 5517] getdents(13, /* 0 entries */, 32768) = 0
[pid 5517] close(13) = 0
[pid 5517] stat("/etc/keepalived/conf.d/gateway.conf", {st_mode=S_IFREG|0644, st_size=668, ...}) = 0
[pid 5517] sendto(3, "<14>Jul 11 17:55:15 Keepalived_v"..., 94, MSG_NOSIGNAL, NULL, 0) = 94
[pid 5517] open("/etc/keepalived/conf.d/gateway.conf", O_RDONLY) = 13
[pid 5517] fstat(13, {st_mode=S_IFREG|0644, st_size=668, ...}) = 0
[pid 5517] open(".", O_RDONLY|O_DIRECTORY|O_PATH) = 14
[pid 5517] chdir("/etc/keepalived/conf.d") = 0
[pid 5517] fstat(13, {st_mode=S_IFREG|0644, st_size=668, ...}) = 0
[pid 5517] read(13, "global_defs {\n script_user root"..., 4096) = 668
[pid 5517] read(13, "", 4096) = 0
[pid 5517] close(13) = 0
[pid 5517] fchdir(14) = 0
[pid 5517] close(14) = 0
[pid 5517] sendto(3, "<14>Jul 11 17:55:15 Keepalived_v"..., 124, MSG_NOSIGNAL, NULL, 0) = 124
[pid 5517] read(11, "", 4096) = 0
[pid 5517] close(11) = 0
[pid 5517] fchdir(12) = 0
[pid 5517] close(12) = 0
[pid 5517] --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0} ---
[pid 5517] +++ killed by SIGSEGV (core dumped) +++
<... epoll_wait resumed> [{EPOLLIN, {u32=3391351136, u64=94119009708384}}], 2, -1) = 1
read(7, "\21\0\0\0\0\0\0\0\3\0\0\0\215\25\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 128) = 128
wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGSEGV && WCOREDUMP(s)}], WNOHANG, NULL) = 5517
wait4(-1, 0x7ffe0a874044, WNOHANG, NULL) = -1 ECHILD (No child processes)
read(7, 0x7ffe0a874080, 128) = -1 EAGAIN (Resource temporarily unavailable)
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 90, MSG_NOSIGNAL, NULL, 0) = 90
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 107, MSG_NOSIGNAL, NULL, 0) = 107
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 104, MSG_NOSIGNAL, NULL, 0) = 104
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 121, MSG_NOSIGNAL, NULL, 0) = 121
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 111, MSG_NOSIGNAL, NULL, 0) = 111
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 117, MSG_NOSIGNAL, NULL, 0) = 117
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 124, MSG_NOSIGNAL, NULL, 0) = 124
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 117, MSG_NOSIGNAL, NULL, 0) = 117
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 91, MSG_NOSIGNAL, NULL, 0) = 91
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 83, MSG_NOSIGNAL, NULL, 0) = 83
uname({sysname="Linux", nodename="xxxxxxxx", ...}) = 0
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 119, MSG_NOSIGNAL, NULL, 0) = 119
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 101, MSG_NOSIGNAL, NULL, 0) = 101
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 138, MSG_NOSIGNAL, NULL, 0) = 138
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 126, MSG_NOSIGNAL, NULL, 0) = 126
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 107, MSG_NOSIGNAL, NULL, 0) = 107
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 111, MSG_NOSIGNAL, NULL, 0) = 111
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 124, MSG_NOSIGNAL, NULL, 0) = 124
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 128, MSG_NOSIGNAL, NULL, 0) = 128
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 137, MSG_NOSIGNAL, NULL, 0) = 137
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 120, MSG_NOSIGNAL, NULL, 0) = 120
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 137, MSG_NOSIGNAL, NULL, 0) = 137
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 136, MSG_NOSIGNAL, NULL, 0) = 136
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 137, MSG_NOSIGNAL, NULL, 0) = 137
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 137, MSG_NOSIGNAL, NULL, 0) = 137
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 132, MSG_NOSIGNAL, NULL, 0) = 132
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 142, MSG_NOSIGNAL, NULL, 0) = 142
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 132, MSG_NOSIGNAL, NULL, 0) = 132
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 75, MSG_NOSIGNAL, NULL, 0) = 75
sendto(3, "<9>Jul 11 17:55:15 Keepalived[55"..., 78, MSG_NOSIGNAL, NULL, 0) = 78
sendto(3, "<14>Jul 11 17:55:15 Keepalived[5"..., 102, MSG_NOSIGNAL, NULL, 0) = 102
timerfd_settime(6, 0, {it_interval={0, 0}, it_value={15, 999999000}}, NULL) = 0
epoll_wait(5, ^Cstrace: Process 5501 detached
<detached ...>
I did update include file processing to resolve issue #1670, but I didn't find any regressions when testing. I will have a look at it to see what is happening.
I have now run your exact configuration on my Ubuntu 16.04 VM and I cannot reproduce the problem.
Could you please attach a tar file of /etc/keepalived and all subordinate directories, just in case there is some strange formatting character that is causing the problem.
It should not be necessary to run keepalived under gdb. The system SHOULD capture the coredump; on my 16.04 system it uses apport to capture coredumps (see https://wiki.ubuntu.com/Apport) and the coredump should be written to /var/crash (apparently).
The full logs I get when starting keepalived are:
Jul 11 19:33:16 ubuntu-16-04-1 systemd[1]: Starting Service for snap application keepalived.daemon...
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3377]: Starting Keepalived v2.1.4 (07/11,2020), git commit v2.1.4-2-ge290f7a
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3377]: WARNING - keepalived was build for newer Linux 4.4.224, running on Linux 4.4.0-185-generic #215-Ubuntu SMP Mon Jun 8 21:53:19 UTC 2020
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3377]: Command line: '/snap/keepalived/1525/usr/sbin/keepalived-404'
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3377]: Opening file '/etc/keepalived/keepalived.conf'.
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3377]: Opening file '/etc/keepalived/conf.d/gateway.conf'.
Jul 11 19:33:16 ubuntu-16-04-1 systemd[1]: Started Service for snap application keepalived.daemon.
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3399]: NOTICE: setting config option max_auto_priority should result in better keepalived performance
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived[3399]: Starting VRRP child process, pid=3400
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: Registering Kernel netlink reflector
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: Registering Kernel netlink command channel
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: Opening file '/etc/keepalived/keepalived.conf'.
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: Opening file '/etc/keepalived/conf.d/gateway.conf'.
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: (VI_router) the first IPv6 VIP address must be link local
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: Registering gratuitous NDISC shared channel
Jul 11 19:33:16 ubuntu-16-04-1 Keepalived_vrrp[3400]: (VI_router) Entering BACKUP STATE (init)
Jul 11 19:33:20 ubuntu-16-04-1 Keepalived_vrrp[3400]: (VI_router) Entering MASTER STATE
Jul 11 19:33:20 ubuntu-16-04-1 Keepalived_vrrp[3400]: (VI_router) using locally configured advertisement interval (1000 milli-sec)
Jul 11 19:33:20 ubuntu-16-04-1 avahi-daemon[869]: Leaving mDNS multicast group on interface ens4.IPv6 with address fe80::5054:ff:fe8b:2b32.
Jul 11 19:33:20 ubuntu-16-04-1 avahi-daemon[869]: Joining mDNS multicast group on interface ens4.IPv6 with address fd00:1234:5678::9abc.
Jul 11 19:33:20 ubuntu-16-04-1 avahi-daemon[869]: Registering new address record for fd00:1234:5678::9abc on ens4.*.
Jul 11 19:33:20 ubuntu-16-04-1 avahi-daemon[869]: Withdrawing address record for fe80::5054:ff:fe8b:2b32 on ens4.
It appears as though some of the early log messages from when you start keepalived are missing and it would be helpful to see them.
Could you please also describe how keepalived is being started on your system.
I can do that, I'll just obfuscate any IPs in them. The actual config is a bit larger than what was in my example. I can't seem to be able to produce anything in /var/crash
even though I think it should be enabled. I've included a bit of syslog.log from where I start the daemon. And also .tar.gz for the config files keepalived_conf.tar.gz
The really weird bit is the first few lines of syslog:
Jul 11 22:25:57 xxxxxxxx Keepalived[6578]: (/etc/keepalived/keepalived.conf: Line 3) Extra '}' found
Jul 11 22:25:57 xxxxxxxx Keepalived[6578]: (/etc/keepalived/keepalived.conf: Line 3) Unknown keyword '}'
and I'm sure there is no such thing. You can either do include /etc/keepalived/conf.d/*.conf
or include /etc/keepalived/conf.d/gateway.conf
. Both will crash just the same. For some reason it looks like it is attempting to read/include the same config twice. Which would explain why it says the bit about VI_router
already being defined. And it does work perfectly fine if I stuff all of it into the same keepalived.conf
without any includes.
@Karimerto Using your exact files I am now getting the segfault and also the log messages about the extra '}' and unknown keyword '}'.
That sounds good. It seems there is clearly some kind of regression since I have not modified the configs in ages and they worked perfectly fine with 2.1.3.
Commit 39af5f7 resolves this issue. The x86_64, i386, ppc64el and s390x snaps have been promoted to stable, and the arm snaps will be once they have been built.
Tested the new release and it seems to be working perfectly well. Thank you.
Describe the bug Latest keepalived (2.1.4) has broken
include
semantics. Keepalived crashed immediately on startup, complaining that I have the same vrrp defined twice, which is most certainly not the case. It worked fine with 2.1.3.To Reproduce Create any kind of common keepalived config, but in a sub directory. The main
keepalived.conf
just needs to read, for example:And of course have some kind of simple vrrp defined somewhere in
/etc/keepalived/conf.d/
.Expected behavior I expected keepalived to work as before. I was rather unpleasantly surprised on a Saturday afternoon to find most of my network offline due to this bug. Once I moved vrrp configs from included files back to the main
keepalived.conf
, I did manage to get it back up and running. But this is not the way forward as I want to keep my configs small, clean and separate for each vrrp instance that I'm running.Keepalived version Keepalived v2.1.4 (07/11,2020), git commit v2.1.4-2-ge290f7a
Copyright(C) 2001-2020 Alexandre Cassen, acassen@gmail.com
Built with kernel headers for Linux 4.4.224 Running on Linux 4.4.0-185-generic #215-Ubuntu SMP Mon Jun 8 21:53:19 UTC 2020
configure options: --prefix= --prefix=/usr --enable-bfd --enable-dbus --enable-json --enable-regex --enable-snmp --enable-snmp-rfc --disable-libipset-dynamic LDFLAGS= -L/build/keepalived/parts/keepalived/install/lib -L/build/keepalived/parts/keepalived/install/usr/lib -L/build/keepalived/parts/keepalived/install/lib/x86_64-linux-gnu -L/build/keepalived/parts/keepalived/install/usr/lib/x86_64-linux-gnu
Config options: LIBIPSET NFTABLES LVS REGEX VRRP VRRP_AUTH JSON BFD OLD_CHKSUM_COMPAT FIB_ROUTING SNMP_V3_FOR_V2 SNMP_VRRP SNMP_CHECKER SNMP_RFCV2 SNMP_RFCV3 DBUS
System options: PIPE2 SIGNALFD INOTIFY_INIT1 VSYSLOG EPOLL_CREATE1 IPV4_DEVCONF IPV6_ADVANCED_API LIBNL3 RTA_ENCAP RTA_NEWDST RTA_PREF FRA_SUPPRESS_PREFIXLEN FRA_SUPPRESS_IFGROUP FRA_TUN_ID RTAX_CC_ALGO RTAX_QUICKACK RTEXT_FILTER_SKIP_STATS RTA_VIA FRA_OIFNAME IFA_FLAGS IP_MULTICAST_ALL LWTUNNEL_ENCAP_MPLS LWTUNNEL_ENCAP_ILA LIBIPSET_PRE_V7 IPTABLES NET_LINUX_IF_H_COLLISION LIBIPVS_NETLINK IPVS_DEST_ATTR_ADDR_FAMILY IPVS_SYNCD_ATTRIBUTES IPVS_64BIT_STATS VRRP_VMAC VRRP_IPVLAN IFLA_LINK_NETNSID CN_PROC SOCK_NONBLOCK SOCK_CLOEXEC O_PATH GLOB_BRACE INET6_ADDR_GEN_MODE VRF SO_MARK SCHED_RESET_ON_FORK
Distro (please complete the following information):
Configuration file:
/etc/keepalived/keepalived.conf
:/etc/keepalived/conf.d/gateway.conf
:System Log entries
Did keepalived coredump? Yes, but is this necessary? If so, I can run it again at some point with broken configs. It is a live environment so I'd rather not break it on purpose.
Additional context Running the latest version from snap, which did the wonderful auto-update thing and broke my entire network.