acassen / keepalived

Keepalived
https://www.keepalived.org
GNU General Public License v2.0
3.98k stars 735 forks source link

Keepalived grouped VIPs: best to advertise only one to smooth failover ? #1933

Closed emper0r closed 3 years ago

emper0r commented 3 years ago

Describe the bug Hi community, We want to know if is Optimal and Necessary to advertise for each VIP even if those are in the same group to send it to the other node ?

Keepalived version

2.2.2

Distro (please complete the following information):

Details of any containerisation or hosted service (e.g. AWS) no

Configuration file: the second server has similar conf just change the priority to 100

global_defs {
   notification_email {
     alerts.@example.com
   }
   notification_email_from keepalived@example.com
   smtp_server x.x.x.x
   smtp_connect_timeout 30
   router_id fw2
}

vrrp_sync_group vg1 {
   group {
            eth0_vip1
            eth1_vip1
            eth2_vip1
            eth3_vip1
            eth4_vip1
            eth5_vip1
            eth6_vip1
         }
   smtp_alert
   notify_master "/.../primary-backup.sh primary"
   notify_backup "/.../primary-backup.sh backup"
   notify_fault "/.../primary-backup.sh fault"
}

vrrp_instance eth0_vip1 {
    state BACKUP 
    interface eth0
    virtual_router_id 10
    priority 50
    garp_master_delay 1
    advert_int 1
    nopreempt
    unicast_src_ip 9.x.x.12
    unicast_peer {
      9.x.x.11
    }   
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        9.x.x.10/19 brd 9.x.x.255 dev eth0
    }
}

vrrp_instance eth1_vip1 {
    state BACKUP
    interface eth1
    virtual_router_id 30
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 1.x.x.12
    unicast_peer {
      1.x.x.11
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        1.x.x.1/24 brd 1.x.x.255 dev eth1
    }
}

vrrp_instance eth3_vip1 {
    state BACKUP
    interface eth3
    virtual_router_id 70
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 3.x.x.132
    unicast_peer {
      3.x.x.131
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        3.x.x.151/25 brd 3.x.x.255 dev eth3
    }
    virtual_ipaddress_excluded {
        3.x.x.152/25 brd 3.x.x.255 dev eth3
    }
}

vrrp_instance eth4_vip1 {
    state BACKUP
    interface eth4
    virtual_router_id 71
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        4.x.x.3/28 brd 4.x.x.15 dev eth4
    }
}

vrrp_instance eth5_vip1 {
    state BACKUP
    interface eth5
    virtual_router_id 72
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 5.x.x.44
    unicast_peer {
      5.x.x.43
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
            5.x.x.45/23 brd 5.x.x.255 dev eth5
    }
}

# VRRP instance
vrrp_instance eth6_vip1 {
    state BACKUP
    interface eth6
    virtual_router_id 73
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 6.x.x.3
    unicast_peer {
      6.x.x.2
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
            6.x.x.1/29 brd 6.x.x.7 dev eth6
    }
}

vrrp_instance eth2_vip1 {
    state BACKUP
    interface eth2
    virtual_router_id 50
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 2.x.x.92
    unicast_peer {
      2.x.x.91
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        2.x.x.68/27 brd 2.x.x.95 dev eth2
    }
    virtual_ipaddress_excluded {
        2.x.x.69/27 brd 2.x.x.95 dev eth2
       2.x.x.70/27 brd 2.x.x.95 dev eth2
        2.x.x.71/27 brd 2.x.x.95 dev eth2
        2.x.x.72/27 brd 2.x.x.95 dev eth2
        2.x.x.73/27 brd 2.x.x.95 dev eth2
        2.x.x.74/27 brd 2.x.x.95 dev eth2
        2.x.x.75/27 brd 2.x.x.95 dev eth2
        2.x.x.76/27 brd 2.x.x.95 dev eth2
        2.x.x.77/27 brd 2.x.x.95 dev eth2
        2.x.x.78/27 brd 2.x.x.95 dev eth2
        2.x.x.79/27 brd 2.x.x.95 dev eth2
        2.x.x.80/27 brd 2.x.x.95 dev eth2
        2.x.x.81/27 brd 2.x.x.95 dev eth2
        2.x.x.82/27 brd 2.x.x.95 dev eth2
        2.x.x.83/27 brd 2.x.x.95 dev eth2
        2.x.x.84/27 brd 2.x.x.95 dev eth2
        2.x.x.85/27 brd 2.x.x.95 dev eth2
        2.x.x.86/27 brd 2.x.x.95 dev eth2
        2.x.x.87/27 brd 2.x.x.95 dev eth2
        2.x.x.89/27 brd 2.x.x.95 dev eth2
        2.x.x.90/27 brd 2.x.x.95 dev eth2
        2.x.x.93/27 brd 2.x.x.95 dev eth2
        2.x.x.94/27 brd 2.x.x.95 dev eth2
    }
}

Notify and track scripts

N/A

System Log entries

N/A

Did keepalived coredump?

no
pqarmitage commented 3 years ago

Just a note about your configuration: you do not need the dev ethX against each of the VIPs/eVIPs - it defaults to the interface of the VRRP instance. Also, rather than brd 9.x.x.255 etc you can specify brd +, so for example 9.x.x.10/19 brd 9.x.x.255 dev eth0 can be simplified to 9.x.x.10/19 brd +

I think your question relates to whether it is reasonable to have the 23 virtual_ipaddress_excluded or would it be better to include those addresses in the virtual_ipaddress block.

RFC5798 (the current VRRP RFC) states:

IPvX Address(es)

   This refers to one or more IPvX addresses associated with the virtual
   router.  The number of addresses included is specified in the "Count
   IP Addr" field.  These fields are used for troubleshooting
   misconfigured routers.  If more than one address is sent, it is
   recommended that all routers be configured to send these addresses in
   the same order to make it easier to do this comparison.

Including the 23 IPv4 addresses will add 92 bytes to the advert packet. Since keepalived pre-constructs the advert packets, rather than building the packet each time an advert is sent, the number of IP addresses in an advert makes no difference to the work keepalived has to do to send the advert. An advert with only 1 IPv4 address is 46 bytes long (rounded up to 60 bytes due to minimum Ethernet frame size), whereas with 24 IPv4 addresses, the advert packet will be 138 bytes long. By default keepalived checks that the IP addresses in a received advert match what it has configured, but if global_defs vrrp_skip_check_adv_addr is set, this check is not made if the advert is received from the same master as the previous advert.

So, the actual additional work that has to be done to handle the larger packet is minimal, and by adding vrrp_skip_check_adv_addr can be minimised even further. It then becomes a trade off between marginal efficiency gain vs easier troubleshooting, especially a year or two down the line when someone else may be investigating problems.

If I were configuring it, I would include all the addresses in the advert.

Roxyrob commented 3 years ago

Hi @pqarmitage, as we also have config like that, I think @emper0r refers to the fact that if 7 VIPs should be failover/failback together, it makes sense to send advertisements for each VIP ? It would not be enough a single advertisement (choosing one of the more stable interface interconnecting nodes) for all, since after all the 7 VIPs must be on the same node to have a consistent configuration. Instead it seems that having 7 advertisements for 7 VIPs to manage can make less stable system.

Also, about your interpretation of the question, can you write an example for your solution ? We always see documentation about using "virtual_ipaddress" for a primary IP and all other for the same interface in "virtual_ipaddress_excluded" section, as virtual_ipaddress is limited by max 20 IPs, but probably an old keepalived version limit.

pqarmitage commented 3 years ago

You could have a single VRRP instance with all the VIPs, and you would then need to specify the interfaces for the VIPs that weren't on the same interface as the VRRP instance.

I think the disadvantage of this approach is that there is an increased risk of a split-brain scenario. Suppose the single VRRP instance were using eth0, and the two systems became isolated on eth0. Both systems would then become master. But with individual VRRP instances in a sync group, so long as one of the ethX interfaces were working then you would not get both systems becoming master for all VIPs.

Roxyrob commented 3 years ago

Thanks @pqarmitage, this sounds a good answer but how keepalived decide about failover/failback with many VIPs in a sync group (some isolated some not) ?

pqarmitage commented 3 years ago

The member VRRP instances of a sync group can only transition to master state if all members of the sync group are eligible to become master. To put it another way, if any VRRP instance in a sync group is seeing higher priority adverts, the sync group, and all its members , will remain in backup state.

Roxyrob commented 3 years ago

Thank you @pqarmitage. As asked above can you re-write an example for @emper0r config to move all 23 VIPs from virtual_ipaddress_excluded to virtual_ipaddress block ? It is simple like:

vrrp_instance eth2_vip1 {
    state BACKUP
    interface eth2
    virtual_router_id 50
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 2.x.x.92
    unicast_peer {
      2.x.x.91
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        2.x.x.68/27 brd 2.x.x.95 dev eth2
        2.x.x.69/27 brd 2.x.x.95 dev eth2
        2.x.x.70/27 brd 2.x.x.95 dev eth2
        2.x.x.71/27 brd 2.x.x.95 dev eth2
        2.x.x.72/27 brd 2.x.x.95 dev eth2
        2.x.x.73/27 brd 2.x.x.95 dev eth2
        2.x.x.74/27 brd 2.x.x.95 dev eth2
        2.x.x.75/27 brd 2.x.x.95 dev eth2
        2.x.x.76/27 brd 2.x.x.95 dev eth2
        2.x.x.77/27 brd 2.x.x.95 dev eth2
        2.x.x.78/27 brd 2.x.x.95 dev eth2
        2.x.x.79/27 brd 2.x.x.95 dev eth2
        2.x.x.80/27 brd 2.x.x.95 dev eth2
        2.x.x.81/27 brd 2.x.x.95 dev eth2
        2.x.x.82/27 brd 2.x.x.95 dev eth2
        2.x.x.83/27 brd 2.x.x.95 dev eth2
        2.x.x.84/27 brd 2.x.x.95 dev eth2
        2.x.x.85/27 brd 2.x.x.95 dev eth2
        2.x.x.86/27 brd 2.x.x.95 dev eth2
        2.x.x.87/27 brd 2.x.x.95 dev eth2
        2.x.x.89/27 brd 2.x.x.95 dev eth2
        2.x.x.90/27 brd 2.x.x.95 dev eth2
        2.x.x.93/27 brd 2.x.x.95 dev eth2
        2.x.x.94/27 brd 2.x.x.95 dev eth2
    }
}

Without drawbacks on stability, saied 20 ip limit, etc. ?

pqarmitage commented 3 years ago

I would use the following configuration:

vrrp_instance eth2_vip1 {
    state BACKUP
    interface eth2
    virtual_router_id 50
    priority 50
    garp_master_delay 2
    advert_int 1
    nopreempt
    unicast_src_ip 2.x.x.92
    unicast_peer {
      2.x.x.91
    }
    authentication {
        auth_type PASS
        auth_pass pwd
    }
    virtual_ipaddress {
        2.x.x.68/27 brd +
        2.x.x.69/27 brd +
        2.x.x.70/27 brd +
        2.x.x.71/27 brd +
        2.x.x.72/27 brd +
        2.x.x.73/27 brd +
        2.x.x.74/27 brd +
        2.x.x.75/27 brd +
        2.x.x.76/27 brd +
        2.x.x.77/27 brd +
        2.x.x.78/27 brd +
        2.x.x.79/27 brd +
        2.x.x.80/27 brd +
        2.x.x.81/27 brd +
        2.x.x.82/27 brd +
        2.x.x.83/27 brd +
        2.x.x.84/27 brd +
        2.x.x.85/27 brd +
        2.x.x.86/27 brd +
        2.x.x.87/27 brd +
        2.x.x.89/27 brd +
        2.x.x.90/27 brd +
        2.x.x.93/27 brd +
        2.x.x.94/27 brd +
    }
}

Since the VRRP instance is on eth2, the VIPs will be configured on eth2 by default. Also, I prefer the brd + syntax (there is also brd -), where brd + means the host part of the address (in this case the last 5 bits) is all 1s (this is the same syntax as the ip address add command).

I don't know where the suggestion of a limit of 20 VIPs comes from (it may have been an arbitrary limit a long time ago); the limit is what will fit in 1 packet. With an MTU of 1500 bytes, removing the 20 byte IP header, 8 byte VRRP header, and 8 bytes authentication data, leaves 1464 bytes, or room for 366 IPv4 addresses.

Roxyrob commented 3 years ago

Hi @pqarmitage, Thank you for your suggestions.

pqarmitage commented 3 years ago

I think this discussion is concluded now. If anything further is raised this issue can be reopened.