aws / karpenter-provider-aws

Karpenter is a Kubernetes Node Autoscaler built for flexibility, performance, and simplicity.
https://karpenter.sh
Apache License 2.0
6.77k stars 953 forks source link

Empty node didn't get deleted after 15h #6593

Open WxFang opened 3 months ago

WxFang commented 3 months ago

Description

Observed Behavior: Nodes have been running for 15h without actual workloads. Only daemonset pods are running in it. Expected Behavior: Karpenter deletes the underutilized nodes. Reproduction Steps (Please include YAML):

Nodepool Spec

➜  ~ kubectl get nodepool.karpenter.sh gpu-g4-pcwa -o yaml
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
  annotations:
    karpenter.sh/nodepool-hash: "6118002417219636067"
    karpenter.sh/nodepool-hash-version: v2
  creationTimestamp: "2024-07-20T07:01:12Z"
  generation: 1
  name: gpu-g4-pcwa
  resourceVersion: "27752860942"
  uid: 3a31a124-3648-4bd9-90e2-bd0a1b796d2e
spec:
  disruption:
    budgets:
    - nodes: 50%
    consolidationPolicy: WhenUnderutilized
    expireAfter: Never
  limits:
    cpu: "600"
    nvidia.com/gpu: "150"
  template:
    spec:
      nodeClassRef:
        name: gpu-g4-pcwa
      requirements:
      - key: node.kubernetes.io/instance-type
        operator: In
        values:
        - g4dn.xlarge
      - key: nvidia.com/gpu
        operator: In
        values:
        - "true"
      - key: karpenter.sh/capacity-type
        operator: In
        values:
        - on-demand
      - key: nodepool
        operator: In
        values:
        - gpu-g4-pcwa
      - key: kubernetes.io/arch
        operator: In
        values:
        - amd64
      - key: kubernetes.io/os
        operator: In
        values:
        - linux
      taints:
      - effect: NoSchedule
        key: gpu-g4-pcwa
        value: "true"
      - effect: NoSchedule
        key: nvidia.com/gpu
        value: "true"
status:
  resources:
    cpu: "320"
    ephemeral-storage: 8387623360Ki
    memory: 1285524648Ki
    nvidia.com/gpu: "80"
    pods: "2320"

Nodeclaim Spec

➜  ~ kubectl describe nodeclaim gpu-g4-pcwa-xstrf
Name:         gpu-g4-pcwa-xstrf
Namespace:
Labels:       karpenter.k8s.aws/instance-category=g
              karpenter.k8s.aws/instance-cpu=4
              karpenter.k8s.aws/instance-cpu-manufacturer=intel
              karpenter.k8s.aws/instance-ebs-bandwidth=3500
              karpenter.k8s.aws/instance-encryption-in-transit-supported=true
              karpenter.k8s.aws/instance-family=g4dn
              karpenter.k8s.aws/instance-generation=4
              karpenter.k8s.aws/instance-gpu-count=1
              karpenter.k8s.aws/instance-gpu-manufacturer=nvidia
              karpenter.k8s.aws/instance-gpu-memory=16384
              karpenter.k8s.aws/instance-gpu-name=t4
              karpenter.k8s.aws/instance-hypervisor=nitro
              karpenter.k8s.aws/instance-local-nvme=125
              karpenter.k8s.aws/instance-memory=16384
              karpenter.k8s.aws/instance-network-bandwidth=5000
              karpenter.k8s.aws/instance-size=xlarge
              karpenter.sh/capacity-type=on-demand
              karpenter.sh/nodepool=gpu-g4-pcwa
              kubernetes.io/arch=amd64
              kubernetes.io/os=linux
              node.kubernetes.io/instance-type=g4dn.xlarge
              nodepool=gpu-g4-pcwa
              nvidia.com/gpu=true
              topology.k8s.aws/zone-id=usw2-az3
              topology.kubernetes.io/region=us-west-2
              topology.kubernetes.io/zone=us-west-2c
Annotations:  karpenter.k8s.aws/ec2nodeclass-hash: 15846237518603617190
              karpenter.k8s.aws/ec2nodeclass-hash-version: v2
              karpenter.k8s.aws/tagged: true
              karpenter.sh/nodepool-hash: 6118002417219636067
              karpenter.sh/nodepool-hash-version: v2
API Version:  karpenter.sh/v1beta1
Kind:         NodeClaim
Metadata:
  Creation Timestamp:  2024-07-25T22:08:21Z
  Finalizers:
    karpenter.sh/termination
  Generate Name:  gpu-g4-pcwa-
  Generation:     1
  Owner References:
    API Version:           karpenter.sh/v1beta1
    Block Owner Deletion:  true
    Kind:                  NodePool
    Name:                  gpu-g4-pcwa
    UID:                   3a31a124-3648-4bd9-90e2-bd0a1b796d2e
  Resource Version:        27752750172
  UID:                     d51a0aef-25d5-4063-88ce-cbf7bbb913c1
Spec:
  Node Class Ref:
    Name:  gpu-g4-pcwa
  Requirements:
    Key:       kubernetes.io/arch
    Operator:  In
    Values:
      amd64
    Key:       kubernetes.io/os
    Operator:  In
    Values:
      linux
    Key:       karpenter.sh/nodepool
    Operator:  In
    Values:
      gpu-g4-pcwa
    Key:       node.kubernetes.io/instance-type
    Operator:  In
    Values:
      g4dn.xlarge
    Key:       nvidia.com/gpu
    Operator:  In
    Values:
      true
    Key:       karpenter.sh/capacity-type
    Operator:  In
    Values:
      on-demand
    Key:       nodepool
    Operator:  In
    Values:
      gpu-g4-pcwa
  Resources:
    Requests:
      Cpu:             3650m
      Memory:          11268435456
      nvidia.com/gpu:  1
      Pods:            7
  Taints:
    Effect:  NoSchedule
    Key:     gpu-g4-pcwa
    Value:   true
    Effect:  NoSchedule
    Key:     nvidia.com/gpu
    Value:   true
Status:
  Allocatable:
    Cpu:                        3920m
    Ephemeral - Storage:        89Gi
    Memory:                     14481Mi
    nvidia.com/gpu:             1
    Pods:                       29
    vpc.amazonaws.com/pod-eni:  39
  Capacity:
    Cpu:                        4
    Ephemeral - Storage:        100Gi
    Memory:                     15155Mi
    nvidia.com/gpu:             1
    Pods:                       29
    vpc.amazonaws.com/pod-eni:  39
  Conditions:
    Last Transition Time:  2024-07-25T22:11:29Z
    Message:
    Reason:                Initialized
    Status:                True
    Type:                  Initialized
    Last Transition Time:  2024-07-25T22:08:23Z
    Message:
    Reason:                Launched
    Status:                True
    Type:                  Launched
    Last Transition Time:  2024-07-25T22:11:29Z
    Message:
    Reason:                Ready
    Status:                True
    Type:                  Ready
    Last Transition Time:  2024-07-25T22:11:00Z
    Message:
    Reason:                Registered
    Status:                True
    Type:                  Registered
  Image ID:                < deducted >
  Node Name:               ip-172-31-140-231.us-west-2.compute.internal
  Provider ID:             < deducted >
Events:                    <none>
Screenshot 2024-07-26 at 11 15 10 AM

Versions:

WxFang commented 3 months ago

@jonathan-innis Thanks for joining the meeting, Jonathan! :) This ticket is to provide more details on unconsolidated empty nodes I mentioned in the meeting today.

engedaam commented 3 months ago

Do you have any PDBs or pods with do-not-evict annotations? Can you provide any Karpenter logs?

njtran commented 3 months ago

Please share events for the node as well

WxFang commented 3 months ago

No PDBs for these daemonsets. And I didn't find any node events. Karpenter manages 3k+ nodes in this cluster and I see hundreds of nodes like this: empty with daemonsets only and no events from nodes or NodeClaims. Scheduling queue looks fine (< 100) and simulation is running. No specific loggings related. It seems like the cluster is too big for karpenter to scan all nodes. Normally after several hours, these nodes will eventually be consolidated. But obviously we expect much faster performance.

The original nodes were deleted. Here is a new one.

➜  ~ kubectl describe node ip-172-18-138-50.us-west-2.compute.internal
Name:               ip-172-18-138-50.us-west-2.compute.internal
Roles:              <none>
Labels:             beta.kubernetes.io/arch=amd64
                    beta.kubernetes.io/instance-type=c7i.48xlarge
                    beta.kubernetes.io/os=linux
                    failure-domain.beta.kubernetes.io/region=us-west-2
                    failure-domain.beta.kubernetes.io/zone=us-west-2b
                    k8s.io/cloud-provider-aws=5f7d5c3f339ac7f902cf53fa00268999
                    karpenter.k8s.aws/instance-category=c
                    karpenter.k8s.aws/instance-cpu=192
                    karpenter.k8s.aws/instance-cpu-manufacturer=intel
                    karpenter.k8s.aws/instance-ebs-bandwidth=40000
                    karpenter.k8s.aws/instance-encryption-in-transit-supported=true
                    karpenter.k8s.aws/instance-family=c7i
                    karpenter.k8s.aws/instance-generation=7
                    karpenter.k8s.aws/instance-hypervisor=nitro
                    karpenter.k8s.aws/instance-memory=393216
                    karpenter.k8s.aws/instance-network-bandwidth=50000
                    karpenter.k8s.aws/instance-size=48xlarge
                    karpenter.sh/capacity-type=on-demand
                    karpenter.sh/initialized=true
                    karpenter.sh/nodepool=compute
                    karpenter.sh/registered=true
                    kubernetes.io/arch=amd64
                    kubernetes.io/hostname=ip-172-18-138-50.us-west-2.compute.internal
                    kubernetes.io/os=linux
                    node.kubernetes.io/instance-type=c7i.48xlarge
                    nodepool=compute
                    topology.ebs.csi.aws.com/zone=us-west-2b
                    topology.k8s.aws/zone-id=usw2-az1
                    topology.kubernetes.io/region=us-west-2
                    topology.kubernetes.io/zone=us-west-2b
Annotations:        alpha.kubernetes.io/provided-node-ip: 172.18.138.50
                    csi.volume.kubernetes.io/nodeid: {"ebs.csi.aws.com":"i-0b9ff61d337e5ade8"}
                    karpenter.k8s.aws/ec2nodeclass-hash: 7017015924687006381
                    karpenter.k8s.aws/ec2nodeclass-hash-version: v2
                    karpenter.sh/nodepool-hash: 5453664062116956107
                    karpenter.sh/nodepool-hash-version: v2
                    node.alpha.kubernetes.io/ttl: 300
                    volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp:  Fri, 26 Jul 2024 18:14:37 -0700
Taints:             compute=true:NoSchedule
Unschedulable:      false
Lease:
  HolderIdentity:  ip-172-18-138-50.us-west-2.compute.internal
  AcquireTime:     <unset>
  RenewTime:       Tue, 30 Jul 2024 10:26:42 -0700
Conditions:
  Type             Status  LastHeartbeatTime                 LastTransitionTime                Reason                       Message
  ----             ------  -----------------                 ------------------                ------                       -------
  MemoryPressure   False   Tue, 30 Jul 2024 10:21:59 -0700   Fri, 26 Jul 2024 18:14:36 -0700   KubeletHasSufficientMemory   kubelet has sufficient memory available
  DiskPressure     False   Tue, 30 Jul 2024 10:21:59 -0700   Fri, 26 Jul 2024 18:14:36 -0700   KubeletHasNoDiskPressure     kubelet has no disk pressure
  PIDPressure      False   Tue, 30 Jul 2024 10:21:59 -0700   Fri, 26 Jul 2024 18:14:36 -0700   KubeletHasSufficientPID      kubelet has sufficient PID available
  Ready            True    Tue, 30 Jul 2024 10:21:59 -0700   Fri, 26 Jul 2024 18:14:55 -0700   KubeletReady                 kubelet is posting ready status
Addresses:
  InternalIP:   172.18.138.50
  InternalDNS:  ip-172-18-138-50.us-west-2.compute.internal
  Hostname:     ip-172-18-138-50.us-west-2.compute.internal
Capacity:
  cpu:                192
  ephemeral-storage:  104845292Ki
  hugepages-1Gi:      0
  hugepages-2Mi:      0
  memory:             389937584Ki
  pods:               737
Allocatable:
  cpu:                191450m
  ephemeral-storage:  95551679124
  hugepages-1Gi:      0
  hugepages-2Mi:      0
  memory:             381272496Ki
  pods:               737
Non-terminated Pods:          (6 in total)
  Namespace                   Name                                                   CPU Requests  CPU Limits  Memory Requests  Memory Limits  Age
  ---------                   ----                                                   ------------  ----------  ---------------  -------------  ---
  kube-system                 aws-node-cglsb                                         50m (0%)      0 (0%)      0 (0%)           0 (0%)         3d16h
  kube-system                 ebs-csi-node-b88ld                                     30m (0%)      0 (0%)      120Mi (0%)       768Mi (0%)     3d16h
  kube-system                 kube-proxy-fs9bm                                       100m (0%)     0 (0%)      0 (0%)           0 (0%)         3d16h
  logging                     vector-w7gqf                                           100m (0%)     3 (1%)      256Mi (0%)       1Gi (0%)       3d16h
  services                    jaeger-service-agent-daemonset-b8qbr                   0 (0%)        0 (0%)      0 (0%)           0 (0%)         3d16h
  services                    otel-traceagent-opentelemetry-collector-agent-cmmn4    0 (0%)        0 (0%)      0 (0%)           0 (0%)         3d16h
Allocated resources:
  (Total limits may be over 100 percent, i.e., overcommitted.)
  Resource           Requests    Limits
  --------           --------    ------
  cpu                280m (0%)   3 (1%)
  memory             376Mi (0%)  1792Mi (0%)
  ephemeral-storage  0 (0%)      0 (0%)
  hugepages-1Gi      0 (0%)      0 (0%)
  hugepages-2Mi      0 (0%)      0 (0%)
Events:              <none>
➜  ~ kubectl get -A pod --field-selector spec.nodeName=ip-172-18-138-50.us-west-2.compute.internal |  grep Running | awk '{print "kubectl get pod -n " $1 " " $2 " -o json"}' | bash  | jq '.metadata'
{
  "annotations": {
    "artifact.spinnaker.io/location": "kube-system",
    "artifact.spinnaker.io/name": "aws-node",
    "artifact.spinnaker.io/type": "kubernetes/daemonSet",
    "artifact.spinnaker.io/version": "",
    "moniker.spinnaker.io/application": "aws-cni",
    "moniker.spinnaker.io/cluster": "daemonSet aws-node",
    "prometheus.io/instance": "default",
    "prometheus.io/port": "61678",
    "prometheus.io/scrape": "true"
  },
  "creationTimestamp": "2024-07-27T01:14:37Z",
  "generateName": "aws-node-",
  "labels": {
    "app.kubernetes.io/instance": "aws-vpc-cni",
    "app.kubernetes.io/managed-by": "spinnaker",
    "app.kubernetes.io/name": "aws-node",
    "apple_usr_app_name": "aws-node",
    "controller-revision-hash": "64cf77bcd8",
    "k8s-app": "aws-node",
    "pod-template-generation": "15"
  },
  "name": "aws-node-cglsb",
  "namespace": "kube-system",
  "ownerReferences": [
    {
      "apiVersion": "apps/v1",
      "blockOwnerDeletion": true,
      "controller": true,
      "kind": "DaemonSet",
      "name": "aws-node",
      "uid": "2e207144-33f5-40f7-bd18-3d2e38607785"
    }
  ],
  "resourceVersion": "15242243124",
  "uid": "b6609dc8-6723-4450-a255-8f6c771b460e"
}
{
  "creationTimestamp": "2024-07-27T01:14:37Z",
  "generateName": "ebs-csi-node-",
  "labels": {
    "app": "ebs-csi-node",
    "app.kubernetes.io/component": "csi-driver",
    "app.kubernetes.io/instance": "aws-ebs-csi-driver",
    "app.kubernetes.io/managed-by": "Helm",
    "app.kubernetes.io/name": "aws-ebs-csi-driver",
    "app.kubernetes.io/version": "1.28.0",
    "controller-revision-hash": "7f445f486",
    "helm.sh/chart": "aws-ebs-csi-driver-2.28.1",
    "pod-template-generation": "11"
  },
  "name": "ebs-csi-node-b88ld",
  "namespace": "kube-system",
  "ownerReferences": [
    {
      "apiVersion": "apps/v1",
      "blockOwnerDeletion": true,
      "controller": true,
      "kind": "DaemonSet",
      "name": "ebs-csi-node",
      "uid": "44c1bf19-64dd-44a3-a4dc-3d650935d4b6"
    }
  ],
  "resourceVersion": "15242238710",
  "uid": "5e035eb3-683e-4ca2-8ea6-e876eed24253"
}
{
  "annotations": {
    "artifact.spinnaker.io/location": "kube-system",
    "artifact.spinnaker.io/name": "kube-proxy",
    "artifact.spinnaker.io/type": "kubernetes/daemonSet",
    "artifact.spinnaker.io/version": "",
    "moniker.spinnaker.io/application": "kube-proxy",
    "moniker.spinnaker.io/cluster": "daemonSet kube-proxy",
    "prometheus.io/instance": "default",
    "prometheus.io/port": "10249",
    "prometheus.io/scrape": "true"
  },
  "creationTimestamp": "2024-07-27T01:14:37Z",
  "generateName": "kube-proxy-",
  "labels": {
    "app.kubernetes.io/managed-by": "spinnaker",
    "app.kubernetes.io/name": "kube-proxy",
    "apple_usr_app_name": "kube-proxy",
    "controller-revision-hash": "5dd844d6cd",
    "k8s-app": "kube-proxy",
    "pod-template-generation": "7"
  },
  "name": "kube-proxy-fs9bm",
  "namespace": "kube-system",
  "ownerReferences": [
    {
      "apiVersion": "apps/v1",
      "blockOwnerDeletion": true,
      "controller": true,
      "kind": "DaemonSet",
      "name": "kube-proxy",
      "uid": "81fc1247-81e7-4882-aae1-57900c696952"
    }
  ],
  "resourceVersion": "15242236239",
  "uid": "d371f8c2-a3d6-4c40-b987-eaaf8f5e253f"
}
{
  "annotations": {
    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true",
    "prometheus.io/port": "9090",
    "prometheus.io/scrape": "true"
  },
  "creationTimestamp": "2024-07-27T01:14:37Z",
  "generateName": "vector-",
  "labels": {
    "app": "vector",
    "app.kubernetes.io/component": "agent",
    "app.kubernetes.io/instance": "vector",
    "app.kubernetes.io/name": "vector",
    "controller-revision-hash": "978979dcd",
    "pod-template-generation": "2",
    "vector.dev/exclude": "false"
  },
  "name": "vector-w7gqf",
  "namespace": "logging",
  "ownerReferences": [
    {
      "apiVersion": "apps/v1",
      "blockOwnerDeletion": true,
      "controller": true,
      "kind": "DaemonSet",
      "name": "vector",
      "uid": "93a9193c-0856-4a51-b904-fd51ef0cf340"
    }
  ],
  "resourceVersion": "15242261286",
  "uid": "05ffc310-52ee-49e4-ae26-363ecd3c7ca5"
}
{
  "annotations": {
    "linkerd.io/inject": "disabled",
    "prometheus.io/port": "14271",
    "prometheus.io/scrape": "true",
    "sidecar.istio.io/inject": "false"
  },
  "creationTimestamp": "2024-07-27T01:14:37Z",
  "generateName": "jaeger-service-agent-daemonset-",
  "labels": {
    "app": "jaeger",
    "app.kubernetes.io/component": "agent",
    "app.kubernetes.io/instance": "jaeger-service",
    "app.kubernetes.io/managed-by": "jaeger-operator",
    "app.kubernetes.io/name": "jaeger-service-agent",
    "app.kubernetes.io/part-of": "jaeger",
    "apple_usr_app_id": "monitoring",
    "apple_usr_app_name": "jaeger",
    "controller-revision-hash": "86755ddb5",
    "pod-template-generation": "3"
  },
  "name": "jaeger-service-agent-daemonset-b8qbr",
  "namespace": "services",
  "ownerReferences": [
    {
      "apiVersion": "apps/v1",
      "blockOwnerDeletion": true,
      "controller": true,
      "kind": "DaemonSet",
      "name": "jaeger-service-agent-daemonset",
      "uid": "a131462d-145e-49c1-9461-dca6e297e985"
    }
  ],
  "resourceVersion": "15242236317",
  "uid": "9edca4d3-6462-4c8c-bc47-1c7b6b0d621e"
}
{
  "annotations": {
    "artifact.spinnaker.io/location": "services",
    "artifact.spinnaker.io/name": "otel-traceagent-opentelemetry-collector-agent",
    "artifact.spinnaker.io/type": "kubernetes/daemonSet",
    "artifact.spinnaker.io/version": "",
    "checksum/config": "4dcf49020200023fb25d1b73d42a42305493af3ba364678cd24b0917f4356260",
    "moniker.spinnaker.io/application": "opentelemetry-collector",
    "moniker.spinnaker.io/cluster": "daemonSet otel-traceagent-opentelemetry-collector-agent",
    "prometheus.io/port": "8888",
    "prometheus.io/scrape": "true"
  },
  "creationTimestamp": "2024-07-27T01:14:37Z",
  "generateName": "otel-traceagent-opentelemetry-collector-agent-",
  "labels": {
    "app": "otel-traceagent",
    "app.kubernetes.io/instance": "otel-traceagent",
    "app.kubernetes.io/managed-by": "spinnaker",
    "app.kubernetes.io/name": "opentelemetry-collector",
    "component": "agent-collector",
    "controller-revision-hash": "866cd4858f",
    "pod-template-generation": "9",
    "serviceSelector": "otel-traceagent"
  },
  "name": "otel-traceagent-opentelemetry-collector-agent-cmmn4",
  "namespace": "services",
  "ownerReferences": [
    {
      "apiVersion": "apps/v1",
      "blockOwnerDeletion": true,
      "controller": true,
      "kind": "DaemonSet",
      "name": "otel-traceagent-opentelemetry-collector-agent",
      "uid": "5893ff74-b3d7-4672-a820-d35e144d4d2d"
    }
  ],
  "resourceVersion": "15242236463",
  "uid": "2ef661b7-cea7-47ba-bd15-c832af8e1552"
}