apache / cloudstack

Apache CloudStack is an opensource Infrastructure as a Service (IaaS) cloud computing platform
https://cloudstack.apache.org/
Apache License 2.0
2.05k stars 1.1k forks source link

IPv6 issues w/ VR in Shared Network / Advanced Zone #6700

Closed s-seitz closed 2 years ago

s-seitz commented 2 years ago
ISSUE TYPE
COMPONENT NAME
VR
CLOUDSTACK VERSION
4.17.0.1
CONFIGURATION

Advanced Netwoking Zone configured with some Shared Networks on RFC 1918 IPv4- plus public /64 IPv6-Segments.

OS / ENVIRONMENT

Ubuntu 20.04.5 LTS, Cloudstack components: 4.17.0.1-shapeblue0

SUMMARY

The particular networks are externally preconfigured (Vlan/VID, IPv4 NAT outbound, IPv6 Router Advertisement, additional IPv6-IP out of the particular IPv6-Segment configured on Router-VNIC) After creating a Shared network with "Offering for Shared networks", the connectivity of a newly created VM is working as expected. But after "a while" (sorry for being vague), IPv6 connectivity stops working. After investigating this issue, we found, that additionally to the "real" router, the VR begun to advertise itself as IPv6 router. So the VM ends up with two default gateways for the same IPv6-prefix. Which is far from being optimal, but although the VR advertises routing, it doesn't forward IPv6-packets.

STEPS TO REPRODUCE
create network domainid=92ec387a-cb50-4f96-b5f7-f88ad0c57e3b displaynetwork=1 name="v6-403/infra2" displaytext="v6-403/infra2" networkdomain=v6-403.ber1.[REDACTED].berlin networkofferingid=8f53aeb4-d45b-4ce0-82ad-d5b4fe106583 gateway=10.103.0.1 netmask=255.255.0.0 routerip=10.103.1.1 startip=10.103.1.1 endip=10.103.1.255 zoneid=ac4e594b-8855-4fdc-9d0b-4029543ea74b ip6cidr=2a00:12e8:202:1c67::/64 ip6gateway=2a00:12e8:202:1c67::1 routeripv6=2a00:12e8:202:1c67:1::1 startipv6=2a00:12e8:202:1c67:1::1 endipv6=2a00:12e8:202:1c67:1::ff vlan=403
{
  "network": {
    "acltype": "Domain",
    "broadcastdomaintype": "Vlan",
    "broadcasturi": "vlan://403",
    "canusefordeploy": true,
    "cidr": "10.103.0.0/16",
    "created": "2022-09-05T12:55:52+0200",
    "details": {
      "routerip": "10.103.1.1",
      "routeripv6": "2a00:12e8:202:1c67:1::1"
    },
    "displaynetwork": false,
    "displaytext": "v6-403/infra2",
    "dns1": "85.158.0.162",
    "dns2": "85.158.0.163",
    "domain": "infrastructure",
    "domainid": "92ec387a-cb50-4f96-b5f7-f88ad0c57e3b",
    "gateway": "10.103.0.1",
    "hasannotations": false,
    "id": "99a899b4-2f9c-4e36-b726-ff34a23347aa",
    "ip6cidr": "2a00:12e8:202:1c67::/64",
    "ip6gateway": "2a00:12e8:202:1c67::1",
    "ispersistent": false,
    "issystem": false,
    "name": "v6-403/infra2",
    "netmask": "255.255.0.0",
    "networkdomain": "v6-403.ber1.[REDACTED].berlin",
    "networkofferingavailability": "Optional",
    "networkofferingconservemode": true,
    "networkofferingdisplaytext": "Offering for Shared networks",
    "networkofferingid": "8f53aeb4-d45b-4ce0-82ad-d5b4fe106583",
    "networkofferingname": "DefaultSharedNetworkOffering",
    "physicalnetworkid": "f8b33daa-64cd-4200-bfc8-561f86865654",
    "receivedbytes": 0,
    "redundantrouter": false,
    "related": "99a899b4-2f9c-4e36-b726-ff34a23347aa",
    "restartrequired": false,
    "sentbytes": 0,
    "service": [
      {
        "capability": [],
        "name": "UserData",
        "provider": [
          {
            "name": "VirtualRouter"
          }
        ]
      },
      {
        "capability": [
          {
            "canchooseservicecapability": false,
            "name": "DhcpAccrossMultipleSubnets",
            "value": "true"
          }
        ],
        "name": "Dhcp",
        "provider": [
          {
            "name": "VirtualRouter"
          }
        ]
      },
      {
        "capability": [
          {
            "canchooseservicecapability": false,
            "name": "AllowDnsSuffixModification",
            "value": "true"
          }
        ],
        "name": "Dns",
        "provider": [
          {
            "name": "VirtualRouter"
          }
        ]
      }
    ],
    "specifyipranges": true,
    "state": "Setup",
    "strechedl2subnet": false,
    "subdomainaccess": true,
    "tags": [],
    "traffictype": "Guest",
    "type": "Shared",
    "vlan": "403",
    "zoneid": "ac4e594b-8855-4fdc-9d0b-4029543ea74b",
    "zonename": "ber1"
  }
}
EXPECTED RESULTS
The VR on the configured network should not advertise IPv6 routes.
ACTUAL RESULTS
The VR advertises IPv6 routes to itself, which is in best case very inconvenient, but in reality simply wrong.
ADDITIONAL INFORMATION

The following excerpts are from two different running VR, the first on is in "problematic" state, the second one spawned after a Network restart w/ cleanup and w/ live patching. The radvd on the second one has been manually disabled. It didn't start in the first place, because of /etc/radvd.conf was missing during VR startup. It would've been started after a VR reboot. The networks of both VR are setup in an identical way, they differ only in name,title and IPv4- and IPv6-Prefix.

Problematic one:

# ip a s
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 1e:00:aa:03:fc:03 brd ff:ff:ff:ff:ff:ff
    altname enp0s3
    altname ens3
    inet 10.101.1.1/16 brd 10.101.255.255 scope global eth0
       valid_lft forever preferred_lft forever
    inet6 2a00:12e8:202:1c65:1c00:aaff:fe03:fc03/64 scope global 
       valid_lft forever preferred_lft forever
    inet6 fe80::1c00:aaff:fe03:fc03/64 scope link 
       valid_lft forever preferred_lft forever
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 0e:00:a9:fe:98:91 brd ff:ff:ff:ff:ff:ff
    altname enp0s4
    altname ens4
    inet 169.254.152.145/16 brd 169.254.255.255 scope global eth1
       valid_lft forever preferred_lft forever
    inet6 fe80::c00:a9ff:fefe:9891/64 scope link 
       valid_lft forever preferred_lft forever

# ip r s
default via 10.101.0.1 dev eth0 
10.101.0.0/16 dev eth0 proto kernel scope link src 10.101.1.1 
169.254.0.0/16 dev eth1 proto kernel scope link src 169.254.152.145 
root@r-35-VM:~# ip -6 r s
2a00:12e8:202:1c65::/64 dev eth0 proto kernel metric 256 pref medium
fe80::/64 dev eth1 proto kernel metric 256 pref medium
fe80::/64 dev eth0 proto kernel metric 256 pref medium

# cat /etc/sysctl.d/99-sysctl.conf 
# Kernel sysctl configuration file
#
# For binary values, 0 is disabled, 1 is enabled.  See sysctl(8) and
# sysctl.conf(5) for more details.
# @VERSION@

# Controls IP packet forwarding
net.ipv4.ip_forward = 1

# Controls source route verification
net.ipv4.conf.default.rp_filter = 0

# Do not accept source routing
net.ipv4.conf.default.accept_source_route = 0

# Respect local interface in ARP interactions
net.ipv4.conf.default.arp_announce = 2
net.ipv4.conf.default.arp_ignore = 2
net.ipv4.conf.all.arp_announce = 2
net.ipv4.conf.all.arp_ignore = 2

# IPSec NETKEY -- avoid bogus redirects
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.all.secure_redirects = 0
net.ipv4.conf.default.secure_redirects = 0

# Promote secondary ip to be primary if primary IP is removed
net.ipv4.conf.all.promote_secondaries = 1
net.ipv4.conf.default.promote_secondaries = 1

# For smooth transition of the vip address in case of a keepalived failover
net.ipv4.ip_nonlocal_bind = 1

# Controls the System Request debugging functionality of the kernel
kernel.sysrq = 0

# Controls whether core dumps will append the PID to the core filename.
# Useful for debugging multi-threaded applications.
kernel.core_uses_pid = 1

# A better way for the instance to die
kernel.panic = 10
kernel.panic_on_oops = 1
vm.panic_on_oom = 1

# Controls the use of TCP syncookies
net.ipv4.tcp_syncookies = 1

# disable tcp time stamps
net.ipv4.tcp_timestamps = 0

net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_tw_buckets = 1000000
net.core.somaxconn = 65535
net.nf_conntrack_max = 1000000
net.netfilter.nf_conntrack_max = 1000000

# Disable IPv6
net.ipv6.conf.all.disable_ipv6 = 0
net.ipv6.conf.all.forwarding = 1
net.ipv6.conf.all.accept_ra = 1
net.ipv6.conf.all.accept_redirects = 0
net.ipv6.conf.all.autoconf = 0

# Minimum swappiness without disabling it
vm.swappiness=1

# make the kernel more aggressive in reclaiming RAM from the disk and swap caches
vm.vfs_cache_pressure = 200

# try to maintain 'free' memory thereby reducing the size of disk cache, hence reducing swapping.
vm.min_free_kbytes = 20480

# cat /etc/radvd.conf
interface eth0
{
    AdvSendAdvert on;
    MinRtrAdvInterval 5;
    MaxRtrAdvInterval 15;
    prefix 2a00:12e8:202:1c65:1c00:aaff:fe03:fc03/64
    {
        AdvOnLink on;
        AdvAutonomous on;
    };
    RDNSS 2a00:12e8:202:1c00::2
    {
        AdvRDNSSLifetime 30;
    };
    RDNSS 2a00:12e8:202:1c00::3
    {
        AdvRDNSSLifetime 30;
    };

};

# systemctl status radvd
● radvd.service - Router advertisement daemon for IPv6
     Loaded: loaded (/lib/systemd/system/radvd.service; enabled; vendor preset: enabled)
     Active: active (running) since Wed 2022-08-31 01:41:25 UTC; 5 days ago
       Docs: man:radvd(8)
   Main PID: 936 (radvd)
      Tasks: 2 (limit: 228)
     Memory: 508.0K
        CPU: 24.911s
     CGroup: /system.slice/radvd.service
             ├─936 /usr/sbin/radvd --logmethod stderr_clean
             └─937 /usr/sbin/radvd --logmethod stderr_clean

Warning: journal has been rotated since unit was started, output may be incomplete.

# cat /etc/cloudstack/cmdline.json 
{
  "config": {
    "authorized_key": "REDACTED", 
    "baremetalnotificationapikey": "REDACTED", 
    "baremetalnotificationsecuritykey": "REDACTED", 
    "cidrsize": "16", 
    "dhcprange": "10.101.0.1", 
    "dns1": "85.158.0.162", 
    "dns2": "85.158.0.163", 
    "domain": "v6-401.ber1.[REDACTED].berlin", 
    "eth0ip": "10.101.1.1", 
    "eth0ip6": "2a00:12e8:202:1c65:1c00:aaff:fe03:fc03", 
    "eth0ip6prelen": "64", 
    "eth0mask": "255.255.0.0", 
    "eth1ip": "169.254.152.145", 
    "eth1mask": "255.255.0.0", 
    "exposedns": "true", 
    "gateway": "10.101.0.1", 
    "host": "10.4.0.51,10.4.0.52", 
    "ip6dns1": "2a00:12e8:202:1c00::2", 
    "ip6dns2": "2a00:12e8:202:1c00::3", 
    "ip6gateway": "2a00:12e8:202:1c65::1", 
    "name": "r-35-VM", 
    "port": "8080", 
    "redundant_router": "false", 
    "template": "domP", 
    "type": "dhcpsrvr", 
    "useextdns": "true"
  }, 
  "id": "cmdline"
}

After Network restart and cleanup:

# ip a s
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 1e:00:2a:00:02:07 brd ff:ff:ff:ff:ff:ff
    altname enp0s3
    altname ens3
    inet 10.100.1.1/16 brd 10.100.255.255 scope global eth0
       valid_lft forever preferred_lft forever
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 0e:00:a9:fe:9d:e3 brd ff:ff:ff:ff:ff:ff
    altname enp0s4
    altname ens4
    inet 169.254.157.227/16 brd 169.254.255.255 scope global eth1
       valid_lft forever preferred_lft forever

# ip r s
default via 10.100.0.1 dev eth0 
10.100.0.0/16 dev eth0 proto kernel scope link src 10.100.1.1 
169.254.0.0/16 dev eth1 proto kernel scope link src 169.254.157.227 

# ip -6 r s

# cat /etc/sysctl.d/99-sysctl.conf 
# Kernel sysctl configuration file
#
# For binary values, 0 is disabled, 1 is enabled.  See sysctl(8) and
# sysctl.conf(5) for more details.
# @VERSION@

# Controls IP packet forwarding
net.ipv4.ip_forward = 1

# Controls source route verification
net.ipv4.conf.default.rp_filter = 0

# Do not accept source routing
net.ipv4.conf.default.accept_source_route = 0

# Respect local interface in ARP interactions
net.ipv4.conf.default.arp_announce = 2
net.ipv4.conf.default.arp_ignore = 2
net.ipv4.conf.all.arp_announce = 2
net.ipv4.conf.all.arp_ignore = 2

# IPSec NETKEY -- avoid bogus redirects
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.all.secure_redirects = 0
net.ipv4.conf.default.secure_redirects = 0

# Promote secondary ip to be primary if primary IP is removed
net.ipv4.conf.all.promote_secondaries = 1
net.ipv4.conf.default.promote_secondaries = 1

# For smooth transition of the vip address in case of a keepalived failover
net.ipv4.ip_nonlocal_bind = 1

# Controls the System Request debugging functionality of the kernel
kernel.sysrq = 0

# Controls whether core dumps will append the PID to the core filename.
# Useful for debugging multi-threaded applications.
kernel.core_uses_pid = 1

# A better way for the instance to die
kernel.panic = 10
kernel.panic_on_oops = 1
vm.panic_on_oom = 1

# Controls the use of TCP syncookies
net.ipv4.tcp_syncookies = 1

# disable tcp time stamps
net.ipv4.tcp_timestamps = 0

net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_tw_buckets = 1000000
net.core.somaxconn = 65535
net.nf_conntrack_max = 1000000
net.netfilter.nf_conntrack_max = 1000000

# Disable IPv6
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.all.forwarding = 0
net.ipv6.conf.all.accept_ra = 0
net.ipv6.conf.all.accept_redirects = 0
net.ipv6.conf.all.autoconf = 0

# Minimum swappiness without disabling it
vm.swappiness=1

# make the kernel more aggressive in reclaiming RAM from the disk and swap caches
vm.vfs_cache_pressure = 200

# try to maintain 'free' memory thereby reducing the size of disk cache, hence reducing swapping.
vm.min_free_kbytes = 20480

# cat /etc/radvd.conf
interface eth0
{
    AdvSendAdvert on;
    MinRtrAdvInterval 5;
    MaxRtrAdvInterval 15;
    prefix 2a00:12e8:202:1c64:1c00:2aff:fe00:207/64
    {
        AdvOnLink on;
        AdvAutonomous on;
    };
    RDNSS 2a00:12e8:202:1c00::2
    {
        AdvRDNSSLifetime 30;
    };
    RDNSS 2a00:12e8:202:1c00::3
    {
        AdvRDNSSLifetime 30;
    };

};

# systemctl status radvd ## has been disabled after an unsuccessful start due to a initially missing radvd.conf
● radvd.service - Router advertisement daemon for IPv6
     Loaded: loaded (/lib/systemd/system/radvd.service; disabled; vendor preset: enabled)
     Active: inactive (dead)
       Docs: man:radvd(8)

# cat /etc/cloudstack/cmdline.json 
{
  "config": {
    "authorized_key": "REDACTED", 
    "baremetalnotificationapikey": "REDACTED", 
    "baremetalnotificationsecuritykey": "REDACTED", 
    "cidrsize": "16", 
    "dhcprange": "10.100.0.1", 
    "dns1": "85.158.0.162", 
    "dns2": "85.158.0.163", 
    "domain": "v6-400.ber1.[REDACTED].berlin", 
    "eth0ip": "10.100.1.1", 
    "eth0ip6": "2a00:12e8:202:1c64:1c00:2aff:fe00:207", 
    "eth0ip6prelen": "64", 
    "eth0mask": "255.255.0.0", 
    "eth1ip": "169.254.157.227", 
    "eth1mask": "255.255.0.0", 
    "exposedns": "true", 
    "gateway": "10.100.0.1", 
    "host": "10.4.0.51,10.4.0.52", 
    "ip6dns1": "2a00:12e8:202:1c00::2", 
    "ip6dns2": "2a00:12e8:202:1c00::3", 
    "ip6gateway": "2a00:12e8:202:1c64::1", 
    "name": "r-52-VM", 
    "port": "8080", 
    "redundant_router": "false", 
    "template": "domP", 
    "type": "dhcpsrvr", 
    "useextdns": "true"
  }, 
  "id": "cmdline"
}

Please note, that even if a radvd should run (not in our case), it's prefix is configured wrong. It points to the VR's own EUI-64 instead to the network prefix.

weizhouapache commented 2 years ago

@s-seitz thanks for reporting this issue.

@shwstppr this looks like a critical issue for 4.17.1.0

shwstppr commented 2 years ago

Fixed with #6706