Open lmiccini opened 2 years ago
We also tried (did not help): bgp graceful-restart restart-time 20
or bgp graceful-restart restart-time 0 bgp long-lived-graceful-restart stale-time 15
these made it (randomly) work like 10% of the times.
We have tried stracing zebra and during a "systemctl stop frr" we can see the netlink event deleting the route:
sendmsg(14, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=44, nlmsg_type=RTM_DELROUTE, nlmsg_flags=NLM_F_REQUEST|0x400, nlmsg_seq=16, nlmsg_pid=-596803374}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BGP, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNSPEC, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("5.6.7.9")], [{nla_len=8, nla_type=RTA_PRIORITY}, 20]]], iov_len=44}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 44
The 10% of the times when it works (aka route is kept during a stop but deleted/readded during a start) we straced "systemctl start frr" and we can see the following where it looks like there is a RTM_NEWNEXTHOP event rewriting the current routes to use the defaul gw as nexthop (192.168.14.1) followed by a RTM_NEWROUTE that adds the "proper" route back:
25493 sendmsg(14, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=40, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_REPLACE|NLM_F_CREATE, nlmsg_seq=0, nlmsg_pid=-317981181}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_ZEBRA, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 241], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("lo")]]], [{nlmsg_len=40, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_REPLACE|NLM_F_CREATE, nlmsg_seq=2, nlmsg_pid=-317981181}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_ZEBRA, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 242], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eth0")]]], [{nlmsg_len=40, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_REPLACE|NLM_F_CREATE, nlmsg_seq=4, nlmsg_pid=-317981181}, {nh_family=AF_INET6, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_ZEBRA, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 243], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eth0")]]], [{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_REPLACE|NLM_F_CREATE, nlmsg_seq=6, nlmsg_pid=-317981181}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_ZEBRA, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 244], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("192.168.14.1")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eth0")]]], [{nlmsg_len=32, nlmsg_type=RTM_DELNEXTHOP, nlmsg_flags=NLM_F_REQUEST|0x400, nlmsg_seq=8, nlmsg_pid=-317981181}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [{nla_len=8, nla_type=NHA_ID}, 231]], [{nlmsg_len=32, nlmsg_type=RTM_DELNEXTHOP, nlmsg_flags=NLM_F_REQUEST|0x400, nlmsg_seq=9, nlmsg_pid=-317981181}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [{nla_len=8, nla_type=NHA_ID}, 232]], [{nlmsg_len=32, nlmsg_type=RTM_DELNEXTHOP, nlmsg_flags=NLM_F_REQUEST|0x400, nlmsg_seq=10, nlmsg_pid=-317981181}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [{nla_len=8, nla_type=NHA_ID}, 233]], [{nlmsg_len=32, nlmsg_type=RTM_DELNEXTHOP, nlmsg_flags=NLM_F_REQUEST|0x400, nlmsg_seq=11, nlmsg_pid=-317981181}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [{nla_len=8, nla_type=NHA_ID}, 234]], [{nlmsg_len=32, nlmsg_type=RTM_DELNEXTHOP, nlmsg_flags=NLM_F_REQUEST|0x400, nlmsg_seq=12, nlmsg_pid=-317981181}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [{nla_len=8, nla_type=NHA_ID}, 236]]], iov_len=328}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 328
25493 sendmsg(14, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[[{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_REPLACE|NLM_F_CREATE, nlmsg_seq=13, nlmsg_pid=-317981181}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_ZEBRA, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 246], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("192.168.14.102")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eth0")]]], [{nlmsg_len=52, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_REPLACE|NLM_F_CREATE, nlmsg_seq=14, nlmsg_pid=-317981181}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BGP, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("5.6.7.9")], [{nla_len=8, nla_type=RTA_PRIORITY}, 20], [{nla_len=8, nla_type=RTA_NH_ID}, "\xf6\x00\x00\x00"]]]], iov_len=100}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 100
I can't replicate this, or I'm missing something seriously. Check screen recording:
@lmiccini I believe it is the same issue as https://github.com/FRRouting/frr/issues/8425 and https://github.com/FRRouting/frr/issues/12030. I ran a 7.5.1 RPM patched version of FRR with https://github.com/FRRouting/frr/pull/12034 in and I can no longer reproduce the issue.
The downstream bug report is https://bugzilla.redhat.com/show_bug.cgi?id=1948422
thanks Carlos :) I think this is slightly different as we are stopping frr on the same node where we see the routes being removed. In our case we leave vm2 untouched and there we don't see any withdrawal when stopping frr on vm1 (so I suppose the patched rpm works for that purpose).
[root@vm1 ~]# systemctl stop frr
[root@vm1 ~]# ip r |grep bgp
[root@vm1 ~]#
@ton31337 I think you are observing what is happening from the pov of a different peer, while we are interested in what happens on "donatas-pc" itself after you stop frr there.
@ton31337 as @lmiccini mentioned, the routes being reinstalled (deleted and re-added) are the ones on the local node where the frr gets restarted. In your example, imagine that your donatas-pc is also learning a route from the spine, when you restart frr in donatas-pc, that route learnt from spine gets deleted and readded, even though we are running zebra with the -r option
on restart bgp sends route withdrawals to zebra. What is zebra supposed to do other than to honor it? Graceful restart is meant for situations where bgp crashes ( or you initiate a graceful restart ) and a systemctl restart frr
does not initiate a bgp graceful restart operation to allow zebra to retain the routes
Thanks @donaldsharp . What leaves us scratching our heads is that we have another environment using frr-8.0-5 or 8.2.2-4 where routes are kept intact after a systemctl stop frr (or /usr/libexec/frr/frrinit.sh stop ):
# /usr/libexec/frr/frrinit.sh stop
Stopped watchfrr
Stopped staticd
Stopped zebra
Stopped bgpd
Stopped bfdd
#
# ip r
default nhid 404 proto bgp src 172.30.2.2 metric 20
nexthop via 100.64.0.5 dev enp3s0 weight 1
nexthop via 100.65.2.5 dev enp2s0 weight 1
so here it seems that the -r zebra option is in fact working as expected.
ok let's debug what is going on. Can you turn on debug zebra rib detail
then initiate a restart operation? Let's see what it tells us.
2022/09/30 07:38:30 STATIC: [MRN6F-AYZC4] Terminating on signal
2022/09/30 07:38:30 BGP: [ZW1GY-R46JE] Terminating on signal
2022/09/30 07:38:30 ZEBRA: [X55C6-A6ZXJ] Route 5.6.7.9/32(0) queued for processing into sub-queue Early Route Processing
2022/09/30 07:38:30 ZEBRA: [VXKFG-8SJRV][EC 4043309121] Client 'static' encountered an error and is shutting down.
2022/09/30 07:38:30 ZEBRA: [VXKFG-8SJRV][EC 4043309121] Client 'bgp' encountered an error and is shutting down.
2022/09/30 07:38:30 ZEBRA: [YDZ55-W3VM6] release_daemon_table_chunks: Released 0 table chunks
2022/09/30 07:38:30 ZEBRA: [JPSA8-5KYEA] client 33 disconnected 0 static routes removed from the rib
2022/09/30 07:38:30 ZEBRA: [S929C-NZR3N] client 33 disconnected 0 static nhgs removed from the rib
2022/09/30 07:38:30 ZEBRA: [YDZ55-W3VM6] release_daemon_table_chunks: Released 0 table chunks
2022/09/30 07:38:30 BGP: [NC2TR-VN5DQ] 192.168.14.102(vm2) configured Graceful-Restart, skipping unconfig notification
2022/09/30 07:38:30 BGP: [VBC13-MYJ5P] Deleting Default VRF
2022/09/30 07:38:30 BGP: [Q8KQZ-W703Z] 192.168.14.102(vm2) configured Graceful-Restart, skipping shutdown notification
2022/09/30 07:38:30 BGP: [HJYDB-XAZCA] Tx redistribute del VRF 0 afi 1 connected 0
2022/09/30 07:38:30 BGP: [PXVXG-TFNNT] %ADJCHANGE: neighbor 192.168.14.102(vm2) in vrf default Down Neighbor deleted
2022/09/30 07:38:30 BGP: [M452J-37M96] 192.168.14.102(Unknown) Update Group Hash: sort: 2 UpdGrpFlags: 0 UpdGrpAFFlags: 67108867
2022/09/30 07:38:30 BGP: [Z6NRP-471E3] 192.168.14.102(Unknown) Update Group Hash: addpath: 3 UpdGrpCapFlag: 256 UpdGrpCapAFFlag: 2048 route_adv: 0 change local as: 0
2022/09/30 07:38:30 BGP: [Z8Q37-65KK3] 192.168.14.102(Unknown) Update Group Hash: max packet size: 65535 pmax_out: 0 Peer Group: (NONE) rmap out: (NONE)
2022/09/30 07:38:30 BGP: [NTX3N-C3WZX] 192.168.14.102(Unknown) Update Group Hash: dlist out: (NONE) plist out: (NONE) aslist out: (NONE) usmap out: (NONE) advmap: (NONE)
2022/09/30 07:38:30 BGP: [V8B3M-T6VFC] 192.168.14.102(Unknown) Update Group Hash: default rmap: (NONE) shared network and afi active network: 0
2022/09/30 07:38:30 BGP: [V53J3-2N7JP] 192.168.14.102(Unknown) Update Group Hash: Lonesoul: 0 ORF prefix: 0 ORF old: 0 max prefix out: 0
2022/09/30 07:38:30 BGP: [SQ314-QBJCR] 192.168.14.102(Unknown) Update Group Hash key: 4146940082
2022/09/30 07:38:30 BGP: [T91AW-FGMHW] bgp_fsm_change_status : vrf default(0), Status: Deleted established_peers 0
2022/09/30 07:38:30 BGP: [WMCA1-27995] RID change : vrf VRF default(0), RTR ID 5.6.7.8
2022/09/30 07:38:30 BGP: [ZQHFG-DQGX1] 192.168.14.102 went from Established to Deleted
2022/09/30 07:38:30 BGP: [T91AW-FGMHW] bgp_fsm_change_status : vrf default(0), Status: Deleted established_peers 0
2022/09/30 07:38:30 BGP: [ZQHFG-DQGX1] Static announcement went from Idle to Deleted
2022/09/30 07:38:30 BGP: [G7K80-KF0HM] Tx route delete VRF 0 5.6.7.9/32
2022/09/30 07:38:30 BGP: [HXW3G-K1M2A] sendmsg_zebra_rnh: sending cmd ZEBRA_NEXTHOP_UNREGISTER for 192.168.14.102/32 (vrf VRF default)
2022/09/30 07:38:30 BGP: [WZCAP-4RFAT] bgp_delete: deregistering this bgp (null) instance from zebra
2022/09/30 07:38:30 BGP: [X67AQ-923PR] Deregistering VRF 0
2022/09/30 07:38:30 BGP: [K52W0-YZ4T8] VRF Deletion: default(4294967295)
2022/09/30 07:38:30 ZEBRA: [QE6V0-J8BG5] rib_delnode: (0:254):5.6.7.9/32: rn 0x5563fb7a7060, re 0x5563fb7a6700, removing
2022/09/30 07:38:30 ZEBRA: [JF0K0-DVHWH] rib_meta_queue_add: (0:254):5.6.7.9/32: queued rn 0x5563fb7a7060 into sub-queue BGP Routes
2022/09/30 07:38:30 ZEBRA: [NZNZ4-7P54Y] default(0:254):5.6.7.9/32: Processing rn 0x5563fb7a7060
2022/09/30 07:38:30 ZEBRA: [ZJVZ4-XEGPF] default(0:254):5.6.7.9/32: Examine re 0x5563fb7a6700 (bgp) status: Removed Installed flags: Selected dist 20 metric 0
2022/09/30 07:38:30 ZEBRA: [JPJF4-TGCY5] default(0:254):5.6.7.9/32: After processing: old_selected 0x5563fb7a6700 new_selected 0x0 old_fib 0x5563fb7a6700 new_fib 0x0
2022/09/30 07:38:30 ZEBRA: [S31W0-H281H] 0:2545.6.7.9/32: Redist del: re 0x5563fb7a6700 (0:bgp), new re 0x0 (0:None)
2022/09/30 07:38:30 ZEBRA: [S59C1-4C5PP] default(0:254):5.6.7.9/32: Deleting route rn 0x5563fb7a7060, re 0x5563fb7a6700 (bgp)
2022/09/30 07:38:30 ZEBRA: [NM15X-X83N9] rib_process: (0:254):5.6.7.9/32: rn 0x5563fb7a7060, removing re 0x5563fb7a6700
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):5.6.7.9/32: rn 0x5563fb7a7060, re 0x5563fb7a6700
2022/09/30 07:38:30 ZEBRA: [KT8QQ-45WQ0] rib_gc_dest: (0:?):5.6.7.9/32: removing dest from table
2022/09/30 07:38:30 ZEBRA: [HH6N2-PDCJS] default(0:0):5.6.7.9/32 rn 0x5563fb7a7060 dequeued from sub-queue BGP Routes
2022/09/30 07:38:30 ZEBRA: [XVBTQ-5QTVQ] Terminating on signal
2022/09/30 07:38:30 ZEBRA: [QS0NJ-H5QKJ] Zebra final shutdown
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):5.6.7.8/32: rn 0x5563fb727190, re 0x5563fb5f1eb0
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):192.168.14.0/24: rn 0x5563fb5f22a0, re 0x5563fb5f2d70
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):0.0.0.0/0: rn 0x5563fb5cbc40, re 0x5563fb5f36d0
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):5.6.7.8/32 (MRIB): rn 0x5563fb5f2040, re 0x5563fb5f2230
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):192.168.14.0/24 (MRIB): rn 0x5563fb5f2f00, re 0x5563fb5f2fa0
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):fe80::/64: rn 0x5563fb727410, re 0x5563fb5f3270
2022/09/30 07:38:30 ZEBRA: [Y53JX-CBC5H] rib_unlink: (0:254):fe80::/64 (MRIB): rn 0x5563fb5f3360, re 0x5563fb5f34a0
This issue is stale because it has been open 180 days with no activity. Comment or remove the autoclose
label in order to avoid having this issue closed.
Describe the bug
To Reproduce
This is somehow related to https://github.com/FRRouting/frr/issues/12030.
We have two vms. vm1=192.168.14.101 and vm2=192.168.14.102. Respective frr.conf:
and
on vm1 we define an ip address '5.6.7.8' on lo:
similarly we define '5.6.7.9' on vm2:
we see these ips respectively advertised on the other vm:
as soon as we do "systemctl stop frr" on vm1 the route advertised by vm2 disappears:
it only reappears when we "systemctl start frr".
Expected behavior
We expect the --retain option to be honored and the routes not to be removed. We run zebra with -r:
Screenshots
Versions
Additional context