acassen / keepalived

Keepalived
https://www.keepalived.org
GNU General Public License v2.0
3.98k stars 736 forks source link

keepalived and ipvsadm DR loadbalancer on CentOS 8 Stream does not work as expected #1827

Closed Kariton closed 3 years ago

Kariton commented 3 years ago

Describe the issue I have a working keepalived vrrp setup with two direct routing loadbalancer and two realserver behind. i can reach the real server direktly and they work as expected. i cant reach them through the VIP of the loadbalancer.

To Reproduce CentOS 8 Stream minimal installation

Setup loadbalancer:

dnf install -y keepalived ipvsadm

$ keepalived -v
Keepalived v2.0.10 (11/12,2018)

Copyright(C) 2001-2018 Alexandre Cassen, <acassen@gmail.com>

Built with kernel headers for Linux 4.18.0
Running on Linux 4.18.0-259.el8.x86_64 #1 SMP Mon Dec 21 21:05:36 UTC 2020

configure options: --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu --program-prefix= --disable-dependency-tracking --prefix=/usr --exec-prefix=/usr --bindir=/usr/bin --sbindir=/usr/sbin --sysconfdir=/etc --datadir=/usr/share --includedir=/usr/include --libdir=/usr/lib64 --libexecdir=/usr/libexec --localstatedir=/var --sharedstatedir=/var/lib --mandir=/usr/share/man --infodir=/usr/share/info --disable-libiptc --disable-ipset --enable-snmp --enable-snmp-rfc --enable-sha1 --with-init=systemd build_alias=x86_64-redhat-linux-gnu host_alias=x86_64-redhat-linux-gnu PKG_CONFIG_PATH=:/usr/lib64/pkgconfig:/usr/share/pkgconfig CFLAGS=-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection LDFLAGS=-Wl,-z,relro -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld

Config options:  LVS VRRP VRRP_AUTH OLD_CHKSUM_COMPAT FIB_ROUTING SNMP_V3_FOR_V2 SNMP_VRRP SNMP_CHECKER SNMP_RFCV2 SNMP_RFCV3

System options:  PIPE2 SIGNALFD INOTIFY_INIT1 VSYSLOG EPOLL_CREATE1 IPV4_DEVCONF LIBNL3 RTA_ENCAP RTA_EXPIRES RTA_NEWDST RTA_PREF FRA_SUPPRESS_PREFIXLEN FRA_SUPPRESS_IFGROUP FRA_TUN_ID RTAX_CC_ALGO RTAX_QUICKACK RTEXT_FILTER_SKIP_STATS FRA_L3MDEV FRA_UID_RANGE RTAX_FASTOPEN_NO_COOKIE RTA_VIA FRA_OIFNAME FRA_PROTOCOL FRA_IP_PROTO FRA_SPORT_RANGE FRA_DPORT_RANGE RTA_TTL_PROPAGATE IFA_FLAGS IP_MULTICAST_ALL LWTUNNEL_ENCAP_MPLS LWTUNNEL_ENCAP_ILA LIBIPTC_LINUX_NET_IF_H_COLLISION LIBIPVS_NETLINK IPVS_DEST_ATTR_ADDR_FAMILY IPVS_SYNCD_ATTRIBUTES IPVS_64BIT_STATS VRRP_VMAC SOCK_NONBLOCK SOCK_CLOEXEC O_PATH GLOB_BRACE INET6_ADDR_GEN_MODE VRF SO_MARK SCHED_RT SCHED_RESET_ON_FORK
$ ipvsadm -v
ipvsadm v1.31 2019/12/24 (compiled with popt and IPVS v1.2.1)

configure keepalived:

$ cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived

global_defs {
    notification_email {
        root@localhost.localdomain
    }
    notification_email_from root@localhost.localdomain
#    smtp_server 127.0.0.1
#    smtp_connect_timeout 10
    enable_script_security
    script_user keepalived_script keepalived_script
    router_id LBINT
    lvs_sync_daemon enp4s0 LBN id 171
}

vrrp_sync_group LBINT {
    group {
        DMZ
        LBN
    }
    smtp_alert
}

vrrp_script CHK_EXT_GW {
    script "/usr/libexec/keepalived/gateway-chk.sh"
    interval 5
    timeout 4
    fall 3
    rise 3
    init_fail
}

vrrp_script CHK_UPTIME {
    script "/usr/libexec/keepalived/uptime-chk.sh 60"
    interval 3
    fall 3
    rise 3
    init_fail
}

vrrp_instance DMZ {
    interface enp1s0
    state BACKUP
    track_interface {
        enp1s0
    }
    nopreempt
    virtual_router_id 131
    advert_int 1
    garp_master_delay 1
    authentication {
        auth_type PASS
        auth_pass 20210107
    }
    priority 100 #/50
    virtual_ipaddress {
        172.16.10.10/24 dev enp1s0

    }
    track_script {
        CHK_EXT_GW
        CHK_UPTIME
    }
    virtual_ipaddress_excluded {
        172.16.10.30/24
    }
}

vrrp_instance LBN {
    interface enp4s0
    state BACKUP
    track_interface {
        enp4s0
    }
    nopreempt
    virtual_router_id 171
    advert_int 1
    garp_master_delay 1
    authentication {
        auth_type PASS
        auth_pass 20210107
    }
    priority 100 #/50
    virtual_ipaddress {
        172.128.10.1/24 dev enp4s0
    }
    virtual_ipaddress_excluded {
    }
}

virtual_server 172.16.10.30 3128 {
        delay_loop 5
        lb_kind DR
        lb_algo wrr
        protocol TCP
        persistence_timeout 5
        real_server 172.128.10.31 3128 {
        weight 1
       TCP_CHECK {
         connect_timeout 1
         connect_port 3128
         retry 2
         delay_befor_retry 1
          }
        }
        real_server 172.128.10.32 3128 {
        weight 1
       TCP_CHECK {
         connect_timeout 1
         connect_port 3128
         retry 2
         delay_befor_retry 1
         }
        }
}

configure /etc/modules-load.d/ipvs.conf

(load ip_vs kernel module on boot)

$ cat /etc/modules-load.d/ipvs.conf
ip_vs

configure /etc/sysctl.conf

$ cat /etc/sysctl.conf
net.ipv4.ip_forward = 1
net.ipv4.ip_nonlocal_bind = 1
net.ipv4.vs.expire_nodest_conn = 1
net.ipv4.vs.expire_quiescent_template = 1

setup realserver

configure startup script

$ cat /usr/local/sbin/lb_vip.sh
_VIP=172.16.10.30
case $1 in
    start)
        echo "Configure loopback real IP from loadbalancer"
        echo '1' > /proc/sys/net/ipv4/conf/lo/arp_ignore
        echo '2' > /proc/sys/net/ipv4/conf/lo/arp_announce
        echo '1' > /proc/sys/net/ipv4/conf/all/arp_ignore
        echo '2' > /proc/sys/net/ipv4/conf/all/arp_announce
        /sbin/ip addr add ${_VIP}/32 dev lo
                ;;
    stop)
        echo "Unconfigure loopback real IP from loadbalancer"
        /sbin/ip addr del ${_VIP}/32 dev lo
        echo '0' > /proc/sys/net/ipv4/conf/lo/arp_ignore
        echo '0' > /proc/sys/net/ipv4/conf/lo/arp_announce
        echo '0' > /proc/sys/net/ipv4/conf/all/arp_ignore
        echo '0' > /proc/sys/net/ipv4/conf/all/arp_announce
        ;;
      *)
        echo "Usage: $0 {start|stop}"
        exit 1
esac

Expected behavior A clear and concise description of what you expected to happen.

$ pvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  172.16.10.30:3128 wrr persistent 5
  -> 172.128.10.31:3128           Route   1      0          0
  -> 172.128.10.32:3128           Route   1      3          0

but the connection does never realy work:

$ ipvsadm -Lnc
IPVS connection entries
pro expire state       source             virtual            destination
TCP 00:03  NONE        172.16.10.50:0     172.16.10.30:3128  172.128.10.32:3128
TCP 00:59  SYN_RECV    172.16.10.50:61463 172.16.10.30:3128  172.128.10.32:3128
TCP 00:59  SYN_RECV    172.16.10.50:61462 172.16.10.30:3128  172.128.10.32:3128

i expect that the packages gets routed in both direction through the VIP. if i talk direktly to the real server (172.128.10.32:3128 / 172.128.10.31:3128) the connection does work.

Keepalived version Output of keepalived -v

Distro (please complete the following information):

Details of any containerisation or hosted service (e.g. AWS) If keepalived is being run in a container or on a hosted service, provide full details

Configuration file: A full copy of the configuration file, obfuscated if necessary to protect passwords and IP addresses

Notify and track scripts If any notify or track scripts are in use, please provide copies of them

$ cat /usr/libexec/keepalived/gateway-chk.sh
#!/bin/bash
/usr/sbin/arping -c2 -w0.5 -I enp1s0 172.16.10.1
exit $?
$ cat /usr/libexec/keepalived/uptime-chk.sh
#!/bin/bash
if [ -n "${1}" ]
then
  DELTA=$1
else
  DELTA=900
fi
UPTIME=$(( $(date +%s) - $(date -d "$(uptime -s)" +%s) ))
[ ${UPTIME} -ge ${DELTA} ]
exit $?

System Log entries Full keepalived system log entries from when keepalived started

Jan 10 11:47:55 loadbalancer01 Keepalived[107310]: Starting VRRP child process, pid=107312
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: Registering Kernel netlink reflector
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: Registering Kernel netlink command channel
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: Opening file '/etc/keepalived/keepalived.conf'.
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Ignoring track_interface enp1s0 since own interface
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: Assigned address 172.16.10.11 for interface enp1s0
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: (LBN) Ignoring track_interface enp4s0 since own interface
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: Assigned address 172.128.10.2 for interface enp4s0
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: Registering gratuitous ARP shared channel
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) removing VIPs.
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) removing E-VIPs.
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: (LBN) removing VIPs.
Jan 10 11:47:55 loadbalancer01 systemd[1]: Started LVS and VRRP High Availability Monitor.
Jan 10 11:47:55 loadbalancer01 Keepalived_healthcheckers[107311]: Opening file '/etc/keepalived/keepalived.conf'.
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: VRRP sockpool: [ifindex(2), family(IPv4), proto(112), unicast(0), fd(12,13)]
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: VRRP sockpool: [ifindex(3), family(IPv4), proto(112), unicast(0), fd(14,15)]
Jan 10 11:47:55 loadbalancer01 Keepalived_healthcheckers[107311]: Gained quorum 1+0=1 <= 2 for VS [172.16.10.30]:tcp:3128
Jan 10 11:47:55 loadbalancer01 kernel: IPVS: sync thread started: state = BACKUP, mcast_ifn = enp4s0, syncid = 171, id = 0
Jan 10 11:47:55 loadbalancer01 Keepalived_vrrp[107312]: VRRP_Script(CHK_UPTIME) succeeded
Jan 10 11:47:57 loadbalancer01 Keepalived_vrrp[107312]: VRRP_Script(CHK_EXT_GW) succeeded
Jan 10 11:47:57 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Entering BACKUP STATE
Jan 10 11:47:57 loadbalancer01 Keepalived_vrrp[107312]: VRRP_Group(LBINT) Syncing instances to BACKUP state
Jan 10 11:47:57 loadbalancer01 Keepalived_vrrp[107312]: (LBN) Entering BACKUP STATE
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Receive advertisement timeout
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (LBN) Receive advertisement timeout
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (LBN) Entering MASTER STATE
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (LBN) setting VIPs.
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (LBN) Sending/queueing gratuitous ARPs on enp4s0 for 172.128.10.1
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:01 loadbalancer01 kernel: IPVS: stopping backup sync thread 107313 ...
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: VRRP_Group(LBINT) Syncing instances to MASTER state
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Entering MASTER STATE
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) setting VIPs.
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) setting E-VIPs.
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Sending/queueing gratuitous ARPs on enp1s0 for 172.16.10.10
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Sending/queueing gratuitous ARPs on enp1s0 for 172.16.10.30
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:01 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:01 loadbalancer01 kernel: IPVS: sync thread started: state = MASTER, mcast_ifn = enp4s0, syncid = 171, id = 0
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: (LBN) Sending/queueing gratuitous ARPs on enp4s0 for 172.128.10.1
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp4s0 for 172.128.10.1
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Sending/queueing gratuitous ARPs on enp1s0 for 172.16.10.10
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: (DMZ) Sending/queueing gratuitous ARPs on enp1s0 for 172.16.10.30
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.10
Jan 10 11:48:02 loadbalancer01 Keepalived_vrrp[107312]: Sending gratuitous ARP on enp1s0 for 172.16.10.30

Did keepalived coredump? If so, can you please provide a stacktrace from the coredump, using gdb.

Additional context Add any other context about the problem here.

firewalld and selinux are disabled The main problem seems to be that the loadbalancer cant or wouldnt route the packages back.

I cant see an ARP problem

C:\Windows\system32>cmd /V /C "set "IP=172.16.10.30" & FOR /L %i in (1,1,2) do @ping -n 1 -w 1000 "!IP!" >NUL & arp -a | findstr /c:"!IP! "
  172.16.10.30          LB-VIP-MAC     dynamisch
  172.16.10.30          LB-VIP-MAC     dynamisch

on the loadbalancer:

maybe this is related to #1551 ? am i missing something?

Kariton commented 3 years ago

I was missing a siple kernel parameter.

net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.enp4s0.rp_filter = 0
net.ipv4.conf.enp4s0.accept_local = 1