Closed MeenakshiTK closed 1 year ago
k3s version v1.21.1+k3s-9fb22ec1-dirty (9fb22ec1) 3.10.0-1160.62.1.F5.1.el7_8.x86_64
This appears to be a custom build of a very old release of K3s, on a custom build of a very old kernel. That's going to be challenging to support. That said, I'm not aware of any issues of this sort with K3s itself. Do you have the pod specs available? What sort of network configuration is being applied here that is causing the system to find duplicate addresses? Are you able to replicate this issue with newer versions of K3s that bundle newer versions of containerd?
@brandond The version of k3s we are using is the upstream version without changing anything including the CRDs but the only thing is we are building it again in our pipelines and we do not have the liberty to use the newer versions of k3s as of now to replicate this issue.
Please find the pod spec of the virt-launcher pod:
[root@appliance-1 ~]# kubectl describe po virt-launcher-bigip1-1-vsqd4
Name: virt-launcher-bigip1-1-vsqd4
Namespace: default
Priority: 0
Node: appliance-1.chassis.local/1.2.3.4
Start Time: Thu, 09 Jun 2022 11:41:31 +0000
Labels: configby=TPOB-VM
cpumanager=true
guest=bigip1-1
kubevirt.io=virt-launcher
kubevirt.io/created-by=c460cba6-1fb6-4a1a-9a0f-be02824c2b32
name=bigip1
project=default
zone=node1
Annotations: k8s.v1.cni.cncf.io/network-status:
[{
"name": "",
"interface": "eth0",
"ips": [
"1.2.3.4"
],
"mac": "",
"default": true,
"dns": {}
},{
"name": "default/mgmt-conf-bigip1",
"interface": "net1",
"mac": "",
"dns": {}
},{
"name": "default/macvlan-conf-bigip1",
"interface": "net2",
"ips": [
""
],
"mac": "",
"dns": {}
},{
"name": "default/hnet-conf-bigip1",
"interface": "net3",
"ips": [
""
],
"mac": "",
"dns": {}
},{
"name": "default/sriov-net0-bigip1",
"interface": "net4",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net1-bigip1",
"interface": "net5",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net2-bigip1",
"interface": "net6",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net3-bigip1",
"interface": "net7",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net4-bigip1",
"interface": "net8",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net5-bigip1",
"interface": "net9",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net6-bigip1",
"interface": "net10",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net7-bigip1",
"interface": "net11",
"mac": "",
"dns": {}
}]
k8s.v1.cni.cncf.io/networks:
[{"interface":"net1","mac":"","name":"mgmt-conf-bigip1","namespace":"default"},{"interface":"net2","name":"macvlan-conf-b...
k8s.v1.cni.cncf.io/networks-status:
[{
"name": "",
"interface": "eth0",
"ips": [
""
],
"mac": "",
"default": true,
"dns": {}
},{
"name": "default/mgmt-conf-bigip1",
"interface": "net1",
"mac": "",
"dns": {}
},{
"name": "default/macvlan-conf-bigip1",
"interface": "net2",
"ips": [
""
],
"mac": "",
"dns": {}
},{
"name": "default/hnet-conf-bigip1",
"interface": "net3",
"ips": [
""
],
"mac": "",
"dns": {}
},{
"name": "default/sriov-net0-bigip1",
"interface": "net4",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net1-bigip1",
"interface": "net5",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net2-bigip1",
"interface": "net6",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net3-bigip1",
"interface": "net7",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net4-bigip1",
"interface": "net8",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net5-bigip1",
"interface": "net9",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net6-bigip1",
"interface": "net10",
"mac": "",
"dns": {}
},{
"name": "default/sriov-net7-bigip1",
"interface": "net11",
"mac": "",
"dns": {}
}]
kubevirt.io/domain: bigip1-1
Status: Running
IP: 1.2.3.4
IPs:
IP: 1.2.3.4
Controlled By: VirtualMachineInstance/bigip1-1
Init Containers:
kubehelper:
Container ID: containerd://af3972830247df244131a6a6cabcf09759f3a097b35ba97447d8b562d305b49c
Image: localhost:2001/kubehelper:5.9.2
Image ID: localhost:2001/kubehelper@sha256:c974bf57dceb6aaf0ef002e39ab9e0148ce01df7f9e62fbb0ccc4efd6870147f
Port: <none>
Host Port: <none>
State: Terminated
Reason: Completed
Exit Code: 0
Started: Thu, 09 Jun 2022 11:42:04 +0000
Finished: Thu, 09 Jun 2022 11:42:14 +0000
Ready: True
Restart Count: 0
Environment:
TMM_DESCSOCK_SVC_ID: 0
TENANT_ID: 10
KVM_OPERATION: 4
KVM_MEMORY: 12884901888
TENANT_OP: BIGIP
HA_IP: BIGIP
HA_MASK: BIGIP
TENANT_NAME: bigip1
TENANT_TYPE: BIGIP
TENANT_SIZE: 76
VLOG_INFO: sevbound
SLOT_NUM: 1
KVM_CPU_NICE: 0
VMID: bigip1-1
VMCPUS: 4
HTSPLIT: on
F5_HOST_HP_PATH: /var/huge_pages/2048kB/bigip1/1/bigip1
Mounts:
/dev/hugepages from hugepages (rw)
Containers:
compute:
Container ID: containerd://19d734a7bcd359cbe059ae44fc0338f0d8efccfae5ee5a94e3ddee43f5554bde
Image: localhost:2001/kubevirt-appliance/virt-launcher:2.5.2
Image ID: localhost:2001/kubevirt-appliance/virt-launcher@sha256:9ba03734e5135370d3195ddc8d30c25a1d2b98502c3a0a8cdf2275188741fbe1
Port: <none>
Host Port: <none>
Command:
/usr/bin/virt-launcher
--qemu-timeout
5m
--name
bigip1-1
--uid
c460cba6-1fb6-4a1a-9a0f-be02824c2b32
--namespace
default
--kubevirt-share-dir
/var/run/kubevirt
--ephemeral-disk-dir
/var/run/kubevirt-ephemeral-disks
--container-disk-dir
/var/run/kubevirt/container-disks
--grace-period-seconds
195
--hook-sidecars
0
--less-pvc-space-toleration
10
--ovmf-path
/usr/share/OVMF
State: Running
Started: Thu, 09 Jun 2022 11:42:14 +0000
Ready: True
Restart Count: 0
Limits:
devices.kubevirt.io/kvm: 1
devices.kubevirt.io/tun: 1
devices.kubevirt.io/vhost-net: 1
hugepages-2Mi: 12884901888
intel.com/intel_sriov_netdevice0_bigip1: 1
intel.com/intel_sriov_netdevice1_bigip1: 1
intel.com/intel_sriov_netdevice2_bigip1: 1
intel.com/intel_sriov_netdevice3_bigip1: 1
intel.com/intel_sriov_netdevice4_bigip1: 1
intel.com/intel_sriov_netdevice5_bigip1: 1
intel.com/intel_sriov_netdevice6_bigip1: 1
intel.com/intel_sriov_netdevice7_bigip1: 1
Requests:
cpu: 4
devices.kubevirt.io/kvm: 1
devices.kubevirt.io/tun: 1
devices.kubevirt.io/vhost-net: 1
hugepages-2Mi: 12884901888
intel.com/intel_sriov_netdevice0_bigip1: 1
intel.com/intel_sriov_netdevice1_bigip1: 1
intel.com/intel_sriov_netdevice2_bigip1: 1
intel.com/intel_sriov_netdevice3_bigip1: 1
intel.com/intel_sriov_netdevice4_bigip1: 1
intel.com/intel_sriov_netdevice5_bigip1: 1
intel.com/intel_sriov_netdevice6_bigip1: 1
intel.com/intel_sriov_netdevice7_bigip1: 1
memory: 480331648
Environment:
KUBEVIRT_RESOURCE_NAME_hnet-conf-bigip1:
KUBEVIRT_RESOURCE_NAME_sriov-net4-bigip1: intel.com/intel_sriov_netdevice4_bigip1
KUBEVIRT_RESOURCE_NAME_sriov-net5-bigip1: intel.com/intel_sriov_netdevice5_bigip1
KUBEVIRT_RESOURCE_NAME_sriov-net6-bigip1: intel.com/intel_sriov_netdevice6_bigip1
KUBEVIRT_RESOURCE_NAME_sriov-net7-bigip1: intel.com/intel_sriov_netdevice7_bigip1
KUBEVIRT_RESOURCE_NAME_mgmt-conf-bigip1:
KUBEVIRT_RESOURCE_NAME_macvlan-conf-bigip1:
KUBEVIRT_RESOURCE_NAME_sriov-net0-bigip1: intel.com/intel_sriov_netdevice0_bigip1
KUBEVIRT_RESOURCE_NAME_sriov-net1-bigip1: intel.com/intel_sriov_netdevice1_bigip1
KUBEVIRT_RESOURCE_NAME_sriov-net2-bigip1: intel.com/intel_sriov_netdevice2_bigip1
KUBEVIRT_RESOURCE_NAME_sriov-net3-bigip1: intel.com/intel_sriov_netdevice3_bigip1
TMM_DESCSOCK_SVC_ID: 0
TENANT_ID: 10
KVM_OPERATION: 4
KVM_MEMORY: 12884901888
TENANT_OP: BIGIP
HA_IP: BIGIP
HA_MASK: BIGIP
TENANT_NAME: bigip1
TENANT_TYPE: BIGIP
TENANT_SIZE: 76
VLOG_INFO: sevbound
SLOT_NUM: 1
KVM_CPU_NICE: 0
VMID: bigip1-1
VMCPUS: 4
KUBEVIRT_SHARE_DIR: /var/run/kubevirt
HTSPLIT: on
F5_HOST_HP_PATH: /var/huge_pages/2048kB/bigip1/1/bigip1
Mounts:
/dev/hugepages from hugepages (rw)
/dev/vfio/ from dev-vfio (rw)
/sys/bus/pci/ from pci-bus (rw)
/sys/devices/ from pci-devices (rw)
/var/run/kubevirt from virt-share-dir (rw)
/var/run/kubevirt-ephemeral-disks from ephemeral-disks (rw)
/var/run/kubevirt-infra from infra-ready-mount (rw)
/var/run/kubevirt-private/config-map/bigip1-1-configmap from bigip1-1-configmap (ro)
/var/run/kubevirt-private/secret/bigip1-1-secrets from bigip1-1-secrets (ro)
/var/run/kubevirt-private/vmi-disks/bigip1-host-volume from bigip1-host-volume (rw)
/var/run/kubevirt/container-disks from container-disks (rw)
/var/run/kubevirt/hotplug-disks from hotplug-disks (rw)
/var/run/kubevirt/sockets from sockets (rw)
/var/run/libvirt from libvirt-runtime (rw)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
sockets:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
pci-bus:
Type: HostPath (bare host directory volume)
Path: /sys/bus/pci/
HostPathType:
pci-devices:
Type: HostPath (bare host directory volume)
Path: /sys/devices/
HostPathType:
dev-vfio:
Type: HostPath (bare host directory volume)
Path: /dev/vfio/
HostPathType:
bigip1-host-volume:
Type: HostPath (bare host directory volume)
Path: /var/F5/system/cbip-disks/bigip1
HostPathType: DirectoryOrCreate
bigip1-1-configmap:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: bigip1-1-configmap
Optional: false
bigip1-1-secrets:
Type: Secret (a volume populated by a Secret)
SecretName: bigip1-1-secrets
Optional: false
hugepages:
Type: HostPath (bare host directory volume)
Path: /var/huge_pages/2048kB/bigip1/1
HostPathType:
infra-ready-mount:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
virt-share-dir:
Type: HostPath (bare host directory volume)
Path: /var/run/kubevirt
HostPathType:
virt-bin-share-dir:
Type: HostPath (bare host directory volume)
Path: /var/lib/kubevirt/init/usr/bin
HostPathType:
libvirt-runtime:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
ephemeral-disks:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
container-disks:
Type: HostPath (bare host directory volume)
Path: /var/run/kubevirt/container-disks/c460cba6-1fb6-4a1a-9a0f-be02824c2b32
HostPathType:
hotplug-disks:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
QoS Class: Burstable
Node-Selectors: kubevirt.io/schedulable=true
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning DNSConfigForming 84s (x5877 over 5d2h) kubelet Search Line limits were exceeded, some search paths have been omitted, the applied search line is: default.svc.cluster.local svc.cluster.local cluster.local spk-lab.f5net.com olympus.f5net.com pd.f5net.com
Also please find the net-attach-def for the mgmt-conf:
[root@appliance-1 ~]# kubectl describe net-attach-def mgmt-conf-bigip1
Name: mgmt-conf-bigip1
Namespace: default
Labels: <none>
Annotations: <none>
API Version: k8s.cni.cncf.io/v1
Kind: NetworkAttachmentDefinition
Metadata:
Creation Timestamp: 2022-06-09T11:39:54Z
Generation: 1
Managed Fields:
API Version: k8s.cni.cncf.io/v1
Fields Type: FieldsV1
fieldsV1:
f:spec:
.:
f:config:
Manager: restclient-cpp
Operation: Update
Time: 2022-06-09T11:39:54Z
Resource Version: 1231982
UID: 4a7d1ec7-4aab-40d5-bf38-6decfd706d19
Spec:
Config: { "cniVersion": "0.3.1", "plugins": [ { "type": "macvlan", "master": "mgmt", "mode": "private" }, { "capabilities": { "mac": true }, "type": "tuning" } ] }
Events: <none>
And the network interface plugin we are using is multus.
Could you please let us know any other logs that are required? Also it would be really helpful if you could provide us on some insights on how to resolve this issue.
Thank you!
we do not have the liberty to use the newer versions of k3s
You can't even use the newest 1.21 patch (v1.21.13
)? Being stuck on v1.21.1
is going to be pretty rough.
And the network interface plugin we are using is multus.
It's really hard to tell what's going on, but it appears to me like it's the multus interfaces that are causing the problems. I don't see the network referenced in the error (error adding container to network \"mgmt-conf-defaultbip-1\"
)anywhere in the describe pod
output, so I'm guessing it's coming from multus? Since almost none of the components involved here (kube-virt and multus) are core Kubernetes or K3s code, but are rather something that you've deployed and configured on your own, I am afraid that you're going to have to track it down within your environment.
@brandond The pod spec that is shared is actually of the working tenants, currently we do not have any setup with the above failure message.
As updated earlier, the log is from CRI environment(containerd). So we are looking for some help on how to enable the traces and debug this issue further.
Jun 07 11:38:12 appliance-1.chassis.local k3s[6204]: E0607 11:38:12.821024 6204 remote_runtime.go:116] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed to setup network for sandbox "302404d3715d53807421355f5ddb7257aa2f7aa8851e330615ff8c031c5bf0f5": [default/virt-launcher-defaultbip-1-1-ln4jc:mgmt-conf-defaultbip-1]: error adding container to network "mgmt-conf-defaultbip-1": address already in use"
You can try starting K3s with environment variable CONTAINERD_LOG_LEVEL=debug
and then grep the containerd logs for cni
, since I suspect that's where your problem lies.
This repository uses a bot to automatically label issues which have not had any activity (commit/comment/label) for 180 days. This helps us manage the community issues better. If the issue is still relevant, please add a comment to the issue so the bot can remove the label and we know it is still valid. If it is no longer relevant (or possibly fixed in the latest release), the bot will automatically close the issue in 14 days. Thank you for your contributions.
Environmental Info: K3s Version: [root@appliance-1 ~]# k3s --version k3s version v1.21.1+k3s-9fb22ec1-dirty (9fb22ec1) go version go1.15.2
Node(s) CPU architecture, OS, and Version: [root@appliance-1 ~]# uname -a Linux appliance-1.chassis.local 3.10.0-1160.62.1.F5.1.el7_8.x86_64 #1 SMP Tue Apr 12 12:57:28 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
Cluster Configuration: Single node: [root@appliance-1 containerd]# kubectl get node -o wide NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME appliance-1.chassis.local Ready control-plane,master 13d v1.21.1+k3s-9fb22ec1-dirty 100.75.3.71 CentOS Linux 7 (Core) 3.10.0-1160.62.1.F5.1.el7_8.x86_64 containerd://1.4.4-k3s2
Describe the bug: Observing the vmi instance not able to come up after pod and containerd restarts. Looks the error is returned by containerd k3s CRI. some clean up din't happen properly during pod deletion or CRI restarts. Hence this error is coming. This is observed as part of k3s live upgrade when pod and containerd restarts during the system reboot.
Steps To Reproduce: Launch a VM and perform multiple k3s live upgrades.
Expected behavior: The vmi instance should be running after the live upgrade
Actual behavior: The vmi instance is unable to come up with the following error:
Jun 07 11:38:12 appliance-1.chassis.local k3s[6204]: E0607 11:38:12.821024 6204 remote_runtime.go:116] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed to setup network for sandbox \"302404d3715d53807421355f5ddb7257aa2f7aa8851e330615ff8c031c5bf0f5\": [default/virt-launcher-defaultbip-1-1-ln4jc:mgmt-conf-defaultbip-1]: error adding container to network \"mgmt-conf-defaultbip-1\": address already in use"
Additional context / logs:
Jun 07 11:38:12 appliance-1.chassis.local k3s[6204]: E0607 11:38:12.821024 6204 remote_runtime.go:116] "RunPodSandbox from runtime service failed" err="rpc error: code = Unknown desc = failed to setup network for sandbox \"302404d3715d53807421355f5ddb7257aa2f7aa8851e330615ff8c031c5bf0f5\": [default/virt-launcher-defaultbip-1-1-ln4jc:mgmt-conf-defaultbip-1]: error adding container to network \"mgmt-conf-defaultbip-1\": address already in use" Jun 07 11:38:12 appliance-1.chassis.local k3s[6204]: E0607 11:38:12.822001 6204 kuberuntime_sandbox.go:68] "Failed to create sandbox for pod" err="rpc error: code = Unknown desc = failed to setup network for sandbox \"302404d3715d53807421355f5ddb7257aa2f7aa8851e330615ff8c031c5bf0f5\": [default/virt-launcher-defaultbip-1-1-ln4jc:mgmt-conf-defaultbip-1]: error adding container to network \"mgmt-conf-defaultbip-1\": address already in use" pod="default/virt-launcher-defaultbip-1-1-ln4jc" Jun 07 11:38:12 appliance-1.chassis.local k3s[6204]: E0607 11:38:12.822038 6204 kuberuntime_manager.go:790] "CreatePodSandbox for pod failed" err="rpc error: code = Unknown desc = failed to setup network for sandbox \"302404d3715d53807421355f5ddb7257aa2f7aa8851e330615ff8c031c5bf0f5\": [default/virt-launcher-defaultbip-1-1-ln4jc:mgmt-conf-defaultbip-1]: error adding container to network \"mgmt-conf-defaultbip-1\": address already in use" pod="default/virt-launcher-defaultbip-1-1-ln4jc" Jun 07 11:38:12 appliance-1.chassis.local k3s[6204]: E0607 11:38:12.822120 6204 pod_workers.go:190] "Error syncing pod, skipping" err="failed to \"CreatePodSandbox\" for \"virt-launcher-defaultbip-1-1-ln4jc_default(38ed781d-b3d5-431c-b4a1-0a5da3185904)\" with CreatePodSandboxError: \"Failed to create sandbox for pod \\"virt-launcher-defaultbip-1-1-ln4jc_default(38ed781d-b3d5-431c-b4a1-0a5da3185904)\\": rpc error: code = Unknown desc = failed to setup network for sandbox \\"302404d3715d53807421355f5ddb7257aa2f7aa8851e330615ff8c031c5bf0f5\\": [default/virt-launcher-defaultbip-1-1-ln4jc:mgmt-conf-defaultbip-1]: error adding container to network \\"mgmt-conf-defaultbip-1\\": address already in use\"" pod="default/virt-launcher-defaultbip-1-1-ln4jc" podUID=38ed781d-b3d5-431c-b4a1-0a5da3185904
=======================
[root@appliance-1 containerd]# crictl --version crictl version v1.21.0-k3s1
Backporting