kubeovn / kube-ovn

A Bridge between SDN and Cloud Native (Project under CNCF)
https://kubeovn.github.io/docs/stable/en/
Apache License 2.0
1.95k stars 442 forks source link

vpc-dns pods Pending #2790

Closed willzhang closed 10 months ago

willzhang commented 1 year ago

Expected Behavior

vpc-dns pods Running

Actual Behavior

vpc-dns pods Pending

Steps to Reproduce the Problem

create vpc dns in underlayer mode

https://kubeovn.github.io/docs/v1.11.x/advance/vpc-internal-lb/

first problem: no pods found with -l app=vpc-dns

root@node34:~# kubectl -n kube-system get pods -l app=vpc-dns
No resources found in kube-system namespace.
root@node34:~# 

root@node34:~# kubectl -n kube-system get pods --show-labels
NAME                                   READY   STATUS    RESTARTS   AGE     LABELS
coredns-565d847f94-dfrvl               1/1     Running   0          5h35m   k8s-app=kube-dns,pod-template-hash=565d847f94
coredns-565d847f94-tssdh               1/1     Running   0          5h35m   k8s-app=kube-dns,pod-template-hash=565d847f94
etcd-node34                            1/1     Running   6          5h36m   component=etcd,tier=control-plane
kube-apiserver-node34                  1/1     Running   0          5h36m   component=kube-apiserver,tier=control-plane
kube-controller-manager-node34         1/1     Running   0          5h36m   component=kube-controller-manager,tier=control-plane
kube-multus-ds-b46f5                   1/1     Running   0          4h3m    app=multus,controller-revision-hash=5f559b69cc,name=multus,pod-template-generation=1,tier=node
kube-ovn-cni-q487r                     1/1     Running   0          5h36m   app=kube-ovn-cni,component=network,controller-revision-hash=597c7587b6,pod-template-generation=1,type=infra
kube-ovn-controller-5d6bb4c445-zltb9   1/1     Running   0          5h36m   app=kube-ovn-controller,component=network,pod-template-hash=5d6bb4c445,type=infra
kube-ovn-monitor-9754c455c-jp84n       1/1     Running   0          5h36m   app=kube-ovn-monitor,component=network,pod-template-hash=9754c455c,type=infra
kube-ovn-pinger-2wz7l                  1/1     Running   0          5h35m   app=kube-ovn-pinger,component=network,controller-revision-hash=587cb8dc6d,pod-template-generation=1,type=infra
kube-proxy-mgt7d                       1/1     Running   0          5h36m   controller-revision-hash=5f6bcf49c,k8s-app=kube-proxy,pod-template-generation=1
kube-scheduler-node34                  1/1     Running   0          5h36m   component=kube-scheduler,tier=control-plane
ovn-central-6877744858-dlwww           1/1     Running   0          5h36m   app=ovn-central,component=network,ovn-nb-leader=true,ovn-northd-leader=true,ovn-sb-leader=true,pod-template-hash=6877744858,type=infra
ovs-ovn-vrcct                          1/1     Running   0          5h36m   app=ovs,component=network,controller-revision-hash=8695955b8f,pod-template-generation=1,type=infra
vpc-dns-test-cjh1-78ffdd9dcd-rgnr7     0/1     Pending   0          59m     k8s-app=vpc-dns-test-cjh1,pod-template-hash=78ffdd9dcd
vpc-dns-test-cjh1-78ffdd9dcd-tk9z8     0/1     Pending   0          59m     k8s-app=vpc-dns-test-cjh1,pod-template-hash=78ffdd9dcd
vpc-dns-test-cjh1-8bdc58f66-6xxfh      1/1     Running   0          59m     k8s-app=vpc-dns-test-cjh1,pod-template-hash=8bdc58f66
root@node34:~# 

second problem: some vpc-dns pods pendding when k8s just have one nodes.

root@node34:~# kubectl get nodes -o wide
NAME     STATUS   ROLES           AGE     VERSION   INTERNAL-IP     EXTERNAL-IP   OS-IMAGE           KERNEL-VERSION      CONTAINER-RUNTIME
node34   Ready    control-plane   5h35m   v1.25.6   192.168.72.34   <none>        Ubuntu 22.04 LTS   5.15.0-71-generic   containerd://1.6.19

root@node34:~# kubectl -n kube-system  get pods |grep vpc-dns
vpc-dns-test-cjh1-78ffdd9dcd-rgnr7     0/1     Pending   0          58m
vpc-dns-test-cjh1-78ffdd9dcd-tk9z8     0/1     Pending   0          58m
vpc-dns-test-cjh1-8bdc58f66-6xxfh      1/1     Running   0          58m
root@node34:~# kubectl -n  kube-system describe pods vpc-dns-test-cjh1-78ffdd9dcd-rgnr7
Name:                 vpc-dns-test-cjh1-78ffdd9dcd-rgnr7
Namespace:            kube-system
Priority:             2000000000
Priority Class Name:  system-cluster-critical
Service Account:      vpc-dns
Node:                 <none>
Labels:               k8s-app=vpc-dns-test-cjh1
                      pod-template-hash=78ffdd9dcd
Annotations:          deployment.kubernetes.io/revision: 1
                      k8s.v1.cni.cncf.io/networks: default/ovn-nad
                      ovn-nad.default.ovn.kubernetes.io/allocated: true
                      ovn-nad.default.ovn.kubernetes.io/cidr: 100.64.0.0/10
                      ovn-nad.default.ovn.kubernetes.io/gateway: 100.64.0.1
                      ovn-nad.default.ovn.kubernetes.io/ip_address: 100.64.0.14
                      ovn-nad.default.ovn.kubernetes.io/logical_router: ovn-cluster
                      ovn-nad.default.ovn.kubernetes.io/logical_switch: ovn-default
                      ovn-nad.default.ovn.kubernetes.io/mac_address: 00:00:00:31:C6:0E
                      ovn-nad.default.ovn.kubernetes.io/pod_nic_type: veth-pair
                      ovn.kubernetes.io/allocated: true
                      ovn.kubernetes.io/cidr: 10.0.1.0/24
                      ovn.kubernetes.io/gateway: 10.0.1.254
                      ovn.kubernetes.io/ip_address: 10.0.1.14
                      ovn.kubernetes.io/logical_router: test-vpc-1
                      ovn.kubernetes.io/logical_switch: net1
                      ovn.kubernetes.io/mac_address: 00:00:00:77:F2:AE
                      ovn.kubernetes.io/pod_nic_type: veth-pair
Status:               Pending
IP:                   
IPs:                  <none>
Controlled By:        ReplicaSet/vpc-dns-test-cjh1-78ffdd9dcd
Init Containers:
  init-route:
    Image:      kubeovn/vpc-nat-gateway:v1.11.0
    Port:       <none>
    Host Port:  <none>
    Command:
      sh
      -c
      ip route add ${KUBERNETES_SERVICE_HOST} via 100.64.0.1 dev net1;ip route add 114.114.114.114 via 100.64.0.1 dev net1;ip route add 8.8.8.8 via 100.64.0.1 dev net1;
    Environment:  <none>
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pd9pd (ro)
Containers:
  coredns:
    Image:       registry.k8s.io/coredns/coredns:v1.9.3
    Ports:       53/UDP, 53/TCP, 9153/TCP
    Host Ports:  0/UDP, 0/TCP, 0/TCP
    Args:
      -conf
      /etc/coredns/Corefile
    Limits:
      memory:  170Mi
    Requests:
      cpu:        100m
      memory:     70Mi
    Environment:  <none>
    Mounts:
      /etc/coredns from config-volume (ro)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pd9pd (ro)
Conditions:
  Type           Status
  PodScheduled   False 
Volumes:
  config-volume:
    Type:      ConfigMap (a volume populated by a ConfigMap)
    Name:      vpc-dns-corefile
    Optional:  false
  kube-api-access-pd9pd:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
QoS Class:                   Burstable
Node-Selectors:              kubernetes.io/os=linux
Tolerations:                 CriticalAddonsOnly op=Exists
                             node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type     Reason            Age                   From               Message
  ----     ------            ----                  ----               -------
  Warning  FailedScheduling  3m41s (x13 over 59m)  default-scheduler  0/1 nodes are available: 1 node(s) didn't match pod anti-affinity rules. preemption: 0/1 nodes are available: 1 node(s) didn't match pod anti-affinity rules.

maybe should use preferredDuringSchedulingIgnoredDuringExecution:

root@node34:~# kubectl -n kube-system get deployment.apps/vpc-dns-test-cjh1 -o yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  annotations:
    deployment.kubernetes.io/revision: "2"
  creationTimestamp: "2023-05-11T06:07:23Z"
  generation: 3
  labels:
    ovn.kubernetes.io/vpc-dns: "true"
  name: vpc-dns-test-cjh1
  namespace: kube-system
  resourceVersion: "31010"
  uid: 83257efb-5745-4409-a3c8-394b4d2d0b5b
spec:
  progressDeadlineSeconds: 600
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      k8s-app: vpc-dns-test-cjh1
  strategy:
    rollingUpdate:
      maxSurge: 25%
      maxUnavailable: 1
    type: RollingUpdate
  template:
    metadata:
      annotations:
        deployment.kubernetes.io/revision: "1"
        k8s.v1.cni.cncf.io/networks: default/ovn-nad
        ovn-nad.default.ovn.kubernetes.io/logical_switch: ovn-default
        ovn.kubernetes.io/logical_switch: net1
      creationTimestamp: null
      labels:
        k8s-app: vpc-dns-test-cjh1
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: k8s-app
                operator: In
                values:
                - vpc-dns-test-cjh1
            topologyKey: kubernetes.io/hostname

Additional Info

bobz965 commented 1 year ago

你只有一个node,目前,这个设计上是至少需要两个node

willzhang commented 1 year ago

你只有一个node,目前,这个设计上是至少需要两个node

单节点pending 不太友好吧

bobz965 commented 1 year ago

你只有一个node,目前,这个设计上是至少需要两个node

单节点pending 不太友好吧

单节点更像是测试环境,你尝试修改下deployment的replica看是否可以起来?

willzhang commented 1 year ago

你只有一个node,目前,这个设计上是至少需要两个node

单节点pending 不太友好吧

单节点更像是测试环境,你尝试修改下deployment的replica看是否可以起来?

单节点不一定是测试环境,要看场景,修改deployment副本数改为1是可以的,只是强制要求2个节点pod 才能running这种硬编码感觉不太好

bobz965 commented 1 year ago

你只有一个node,目前,这个设计上是至少需要两个node

单节点pending 不太友好吧

单节点更像是测试环境,你尝试修改下deployment的replica看是否可以起来?

是的,可以,只是强制要求2个节点pod 才能running感觉不太好

有兴趣提个PR支持指定replicas么?

github-actions[bot] commented 11 months ago

Issues go stale after 60d of inactivity. Please comment or re-open the issue if you are still interested in getting this issue fixed.