Open YevhenLodovyi opened 7 months ago
What is the instance type of the node that is finally created? Can you search for the log line that says "message":"initialized nodeclaim"
? What is the cpu
and memory
from that line?
here is log(karpenter recreates the node since it is unused):
{"level":"INFO","time":"2024-04-15T15:22:08.722Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-vqs8n","provider-id":"aws:///eu-west-1b/i-0f4c920baa69adb22","node":"ip-10-50-60-191.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:22:43.788Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-vhb92","provider-id":"aws:///eu-west-1b/i-0aedf7e199d02b99c","node":"ip-10-50-42-90.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:23:29.650Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-zl275","provider-id":"aws:///eu-west-1a/i-0691ddc4f2b328ee9","node":"ip-10-50-24-213.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919624Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:24:07.022Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-zg2mc","provider-id":"aws:///eu-west-1b/i-0f14add06a95b358e","node":"ip-10-50-62-117.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:24:49.475Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-524jt","provider-id":"aws:///eu-west-1b/i-03c18b6fac9ec4f24","node":"ip-10-50-55-252.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:25:24.434Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-t6wmw","provider-id":"aws:///eu-west-1c/i-05afe3059876ffbae","node":"ip-10-50-78-36.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:26:03.199Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-b726z","provider-id":"aws:///eu-west-1b/i-0afd37e0ea042f8bc","node":"ip-10-50-55-186.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:26:45.368Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-fqqdl","provider-id":"aws:///eu-west-1b/i-09f93e7d7cc430b0c","node":"ip-10-50-53-147.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:27:18.853Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-sflpq","provider-id":"aws:///eu-west-1b/i-088030c9309fb4fd1","node":"ip-10-50-57-122.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919624Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:27:58.014Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-9jq8s","provider-id":"aws:///eu-west-1a/i-0ddadd6ac4d5e1c35","node":"ip-10-50-31-93.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:28:38.328Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-nnlgf","provider-id":"aws:///eu-west-1a/i-012c53d6958240fbe","node":"ip-10-50-13-107.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:29:15.801Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-56vn4","provider-id":"aws:///eu-west-1c/i-08724af183d994141","node":"ip-10-50-85-121.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919624Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:29:50.303Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-bxbrf","provider-id":"aws:///eu-west-1b/i-0ab5a126dac5b5df0","node":"ip-10-50-52-171.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919624Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:30:26.178Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-4jwbj","provider-id":"aws:///eu-west-1b/i-03f1b9705ef986281","node":"ip-10-50-45-129.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:31:03.851Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-wlscp","provider-id":"aws:///eu-west-1b/i-062d451a79ce0cc57","node":"ip-10-50-33-115.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:31:41.420Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-n4fgd","provider-id":"aws:///eu-west-1a/i-04385c88d48b7d025","node":"ip-10-50-28-61.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
{"level":"INFO","time":"2024-04-15T15:32:20.947Z","logger":"controller.nodeclaim.lifecycle","message":"initialized nodeclaim","commit":"17dd42b","nodeclaim":"ondemand-default-g74m9","provider-id":"aws:///eu-west-1b/i-0cc8722a809d49d12","node":"ip-10-50-46-41.eu-west-1.compute.internal","allocatable":{"cpu":"3920m","ephemeral-storage":"47233297124","hugepages-1Gi":"0","hugepages-2Mi":"0","memory":"6919632Ki","pods":"110","vpc.amazonaws.com/pod-eni":"18"}}
│ Allocatable: │
│ cpu: 3920m │
│ ephemeral-storage: 47233297124 │
│ hugepages-1Gi: 0 │
│ hugepages-2Mi: 0 │
│ memory: 6919632Ki │
│ pods: 110
so we have 6919632Ki and we need 6828Mi. As far as i undestrand 6919632Ki it is 6757.453125Mi
Would you mind sharing your ec2nodelass
configuration as well?
Can you also do a kubectl describe pod <karpenter_pod>
on your karpenter pod?
Wondering if VM_MEMORY_OVERHEAD_PERCENT
has some different value set that could be causing an issue here.
@jigisha620 sure, here you are:
It is quite strange issue to be honest, I have ~1k pods and only one pod configuration is affected... Also I can reproduce the issue only from time to time(i am talking about static test env where only one pod is pending), in some cases karpenter start the bigger instance type and all is ok.
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass
metadata:
name: al2
namespace: karpenter
spec:
# Required, resolves a default ami and userdata
amiFamily: AL2
# Required, discovers subnets to attach to instances
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: <reducted>
# Required, discovers security groups to attach to instances
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: <reducted>
# Optional, IAM role to use for the node identity.
role: <reducted>
# Optional, configures IMDS for the instance
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
httpPutResponseHopLimit: 2
httpTokens: required
# Optional, configures storage devices for the instance
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
volumeSize: 50Gi
volumeType: gp3
encrypted: true
kmsKeyID: <reducted>
deleteOnTermination: true
# Optional, discovers amis to override the amiFamily's default amis
amiSelectorTerms:
- name: amazon-eks-node-1.28-v20240213
# Optional, overrides autogenerated userdata with a merge semantic
userData: |
<reducted>
❯ k -n monitoring describe pod curl
Name: curl
Namespace: monitoring
Priority: 0
Service Account: default
Node: <none>
Labels: app=karpenter
owner=yevhen
security.istio.io/tlsMode=istio
service.istio.io/canonical-name=karpenter
service.istio.io/canonical-revision=latest
Annotations: istio.io/rev: default
kubectl.kubernetes.io/default-container: grpcurl1
kubectl.kubernetes.io/default-logs-container: grpcurl1
prometheus.io/path: /stats/prometheus
prometheus.io/port: 15020
prometheus.io/scrape: true
sidecar.istio.io/proxyCPU: 300m
sidecar.istio.io/proxyCPULimit: 300m
sidecar.istio.io/proxyMemory: 300Mi
sidecar.istio.io/proxyMemoryLimit: 300Mi
sidecar.istio.io/status:
{"initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["workload-socket","credential-socket","workload-certs","istio-env...
Status: Pending
IP:
IPs: <none>
Init Containers:
istio-init:
Image: <reducted>.dkr.ecr.eu-west-1.amazonaws.com/istio/proxyv2:1.19.3
Port: <none>
Host Port: <none>
Args:
istio-iptables
-p
15001
-z
15006
-u
1337
-m
REDIRECT
-i
*
-x
169.254.169.254/32
-b
*
-d
15090,15021,15020
--log_output_level=default:info
Limits:
cpu: 300m
memory: 300Mi
Requests:
cpu: 300m
memory: 300Mi
Environment:
ISTIO_META_DNS_AUTO_ALLOCATE: true
ISTIO_META_DNS_CAPTURE: true
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-bzx84 (ro)
Containers:
istio-proxy:
Image: <reducted>.dkr.ecr.eu-west-1.amazonaws.com/istio/proxyv2:1.19.3
Port: 15090/TCP
Host Port: 0/TCP
Args:
proxy
sidecar
--domain
$(POD_NAMESPACE).svc.cluster.local
--proxyLogLevel=warning
--proxyComponentLogLevel=misc:error
--log_output_level=default:info
Limits:
cpu: 300m
memory: 300Mi
Requests:
cpu: 300m
memory: 300Mi
Readiness: http-get http://:15021/healthz/ready delay=1s timeout=3s period=2s #success=1 #failure=30
Environment:
JWT_POLICY: third-party-jwt
PILOT_CERT_PROVIDER: istiod
CA_ADDR: istiod.istio-system.svc:15012
POD_NAME: curl (v1:metadata.name)
POD_NAMESPACE: monitoring (v1:metadata.namespace)
INSTANCE_IP: (v1:status.podIP)
SERVICE_ACCOUNT: (v1:spec.serviceAccountName)
HOST_IP: (v1:status.hostIP)
ISTIO_CPU_LIMIT: 1 (limits.cpu)
PROXY_CONFIG: {"proxyMetadata":{"ISTIO_META_DNS_AUTO_ALLOCATE":"true","ISTIO_META_DNS_CAPTURE":"true"},"holdApplicationUntilProxyStarts":true,"proxyHeaders":{"metadataExchangeHeaders":{"mode":"IN_MESH"}}}
ISTIO_META_POD_PORTS: [
]
ISTIO_META_APP_CONTAINERS: grpcurl1,grpcurl
GOMEMLIMIT: 314572800 (limits.memory)
GOMAXPROCS: 1 (limits.cpu)
ISTIO_META_CLUSTER_ID: Kubernetes
ISTIO_META_NODE_NAME: (v1:spec.nodeName)
ISTIO_META_INTERCEPTION_MODE: REDIRECT
ISTIO_META_WORKLOAD_NAME: curl
ISTIO_META_OWNER: kubernetes://apis/v1/namespaces/monitoring/pods/curl
ISTIO_META_MESH_ID: cluster.local
TRUST_DOMAIN: cluster.local
ISTIO_META_DNS_AUTO_ALLOCATE: true
ISTIO_META_DNS_CAPTURE: true
Mounts:
/etc/istio/pod from istio-podinfo (rw)
/etc/istio/proxy from istio-envoy (rw)
/var/lib/istio/data from istio-data (rw)
/var/run/secrets/credential-uds from credential-socket (rw)
/var/run/secrets/istio from istiod-ca-cert (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-bzx84 (ro)
/var/run/secrets/tokens from istio-token (rw)
/var/run/secrets/workload-spiffe-credentials from workload-certs (rw)
/var/run/secrets/workload-spiffe-uds from workload-socket (rw)
grpcurl1:
Image: fullstorydev/grpcurl:latest-alpine
Port: <none>
Host Port: <none>
Command:
/bin/sh
-c
sleep infinity
Limits:
memory: 6Gi
Requests:
cpu: 1260m
memory: 6Gi
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-bzx84 (ro)
grpcurl:
Image: fullstorydev/grpcurl:latest-alpine
Port: <none>
Host Port: <none>
Command:
/bin/sh
-c
sleep infinity
Limits:
memory: 128Mi
Requests:
cpu: 50m
memory: 128Mi
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-bzx84 (ro)
Conditions:
Type Status
PodScheduled False
Volumes:
workload-socket:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
credential-socket:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
workload-certs:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
istio-envoy:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium: Memory
SizeLimit: <unset>
istio-data:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
istio-podinfo:
Type: DownwardAPI (a volume populated by information about the pod)
Items:
metadata.labels -> labels
metadata.annotations -> annotations
istio-token:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 43200
istiod-ca-cert:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: istio-ca-root-cert
Optional: false
kube-api-access-bzx84:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: karpenter/lifecycle=ondemand:NoSchedule
node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 12s default-scheduler 0/5 nodes are available: 1 node(s) had untolerated taint {eks.amazonaws.com/compute-type: fargate}, 2 Insufficient cpu, 2 Insufficient memory, 2 node(s) had untolerated taint {karpenter/lifecycle: spot}. preemption: 0/5 nodes are available: 2 No preemption victims found for incoming pod, 3 Preemption is not helpful for scheduling..
Normal Nominated 10s karpenter Pod should schedule on: nodeclaim/ondemand-default-6jh2r
@YevhenLodovyi Thanks for sharing this information.
Can you also do a
kubectl describe pod <karpenter_pod>
on your karpenter pod? Wondering ifVM_MEMORY_OVERHEAD_PERCENT
has some different value set that could be causing an issue here.
From this I meant if you could describe your karpenter pods or the karpenter deployment to check if while installing Karpenter the value for VM_MEMORY_OVERHEAD_PERCENT
was set to something different.
❯ k -n karpenter describe pod karpenter-5f4b874cc8-25wjd | grep VM_MEMORY_OVERHEAD_PERCENT
VM_MEMORY_OVERHEAD_PERCENT: 0.075
I use default helm chart and do not have any customisation:
karpenter:
serviceAccount:
create: false
name: karpenter
settings:
featureGates:
spotToSpotConsolidation: true
/assign @jigisha620
Hey @YevhenLodovyi Were you able to resolve the problem that you were facing?
Were you able to resolve the problem that you were facing?
I changed the resources configuration for my app deployment to workaround the issue. It is not a fix, it is just workaround:)
I tried to reproduce the issue with the configurations that you have shared. I did not see this issue occur even once. From the logs that you shared earlier I could figure out that Karpenter tried to provision a c6a.xlarge
instance type and kept retrying. Whereas I always got m6a.large
instance type while reproducing this issue.
yes, as i mentioned it is not easy to reproduce. I have no idea why. I managed to reproduce the issue in two EKS clusters 3 times(i made ~10 attempts)
The interesting fact is that i had static cluster, no redeployment, nothing and sometimes i have this issue, sometime not... It looks like there is unknown external factor(
I was able to reproduce this behaviour in 4 different clusters after facing it for the first time. It looks like that karpenter is not considering pods that are created by daemonsets, since it occurred when a daemonset was scheduled with a high cpu request.
Versions:
Karpenter: 0.36.2
Kubernetes: v1.29.4
These were the steps to reproduce it:
Firstly, deploy a daemonset with high cpu/memory usage resource declared. In my case I configured these values:
resources:
limits:
cpu: 1600m
memory: 1Gi
requests:
cpu: 1200m
memory: 600Mi
Than, I created a three replicas deployment with these resources requests/limits:
resources:
limits:
cpu: "2"
memory: 5Gi
requests:
cpu: "1"
memory: 3Gi
After deploying these objects, karpenter started to create nodes endlessly and the deployment was showing lots of nodeclaim creation events, while the pods were always at pending status. All those nodes were from the same type and didn't fit both the daemonset pods and the deployment pods.
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Nominated 12m karpenter Pod should schedule on: nodeclaim/<reducted>-j5zld
Normal Nominated 10m karpenter Pod should schedule on: nodeclaim/<reducted>-jt9mh
Normal Nominated 8m2s karpenter Pod should schedule on: nodeclaim/<reducted>-l4q82
Warning FailedScheduling 6m49s default-scheduler 0/21 nodes are available: 3 Insufficient cpu, 3 node(s) had untolerated taint {node.group.name: <reducted>}, 6 node(s) didn't match Pod's node affinity/selector, 9 node(s) had untolerated taint {node.group.name: <reducted>}. preemption: 0/21 nodes are available: 18 Preemption is not helpful for scheduling, 3 No preemption victims found for incoming pod.
Warning FailedScheduling 6m33s (x2 over 6m41s) default-scheduler 0/20 nodes are available: 2 Insufficient cpu, 3 node(s) had untolerated taint {node.group.name: <reducted>}, 6 node(s) didn't match Pod's node affinity/selector, 9 node(s) had untolerated taint {node.group.name: <reducted>}. preemption: 0/20 nodes are available: 18 Preemption is not helpful for scheduling, 2 No preemption victims found for incoming pod.
Normal Nominated 6m2s karpenter Pod should schedule on: nodeclaim/<reducted>-tk9hs
Warning FailedScheduling 6m default-scheduler 0/20 nodes are available: 1 Insufficient cpu, 1 node(s) had untolerated taint {node.kubernetes.io/not-ready: }, 3 node(s) had untolerated taint {node.group.name: <reducted>}, 6 node(s) didn't match Pod's node affinity/selector, 9 node(s) had untolerated taint {node.group.name: <reducted>}. preemption: 0/20 nodes are available: 1 No preemption victims found for incoming pod, 19 Preemption is not helpful for scheduling.
Warning FailedScheduling 5m50s default-scheduler 0/20 nodes are available: 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }, 3 node(s) had untolerated taint {node.group.name: <reducted>}, 6 node(s) didn't match Pod's node affinity/selector, 9 node(s) had untolerated taint {node.group.name: <reducted>}. preemption: 0/20 nodes are available: 20 Preemption is not helpful for scheduling.
Normal Nominated 4m2s karpenter Pod should schedule on: nodeclaim/<reducted>-r9rwp
Normal Nominated 112s karpenter Pod should schedule on: nodeclaim/<reducted>-bbfvc
Analyzing why karpenter was not able to schedule the pods, it was clearly that the amount of CPU requests of both pods together exceeded the CPU that was available at the node. 1200m + 1000m = 2200m
Capacity:
cpu: 2
ephemeral-storage: 62902252Ki
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 7936952Ki
pods: 29
Allocatable:
cpu: 1930m
ephemeral-storage: 56896973524
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 7246776Ki
pods: 29
might be related to this bug https://github.com/kubernetes-sigs/karpenter/issues/1337
The Kubernetes project currently lacks enough contributors to adequately respond to all issues.
This bot triages un-triaged issues according to the following rules:
lifecycle/stale
is appliedlifecycle/stale
was applied, lifecycle/rotten
is appliedlifecycle/rotten
was applied, the issue is closedYou can:
/remove-lifecycle stale
/close
Please send feedback to sig-contributor-experience at kubernetes/community.
/lifecycle stale
/remove-lifecycle stale
Description
Hi,
Observed Behavior:
I have faced the issue that karpenter creates node that can not fit the pod and as a result pod can not be sceduled and karpenter recreates the nodes endlessly
here is pod configuration:
here is daemonset configuration:
ram:
cpu:
karpenter calculates the same resources:
here is node information:
Expected Behavior:
karpenter creates node that can fit the pod
Reproduction Steps (Please include YAML):
Pod spec
### Pod ```yaml apiVersion: v1 kind: Pod metadata: labels: app: karpenter owner: yevhen name: curl annotations: sidecar.istio.io/proxyCPU: "300m" sidecar.istio.io/proxyCPULimit: "300m" sidecar.istio.io/proxyMemory: "300Mi" sidecar.istio.io/proxyMemoryLimit: "300Mi" spec: containers: - command: - /bin/sh - -c - sleep infinity image: fullstorydev/grpcurl:latest-alpine name: grpcurl1 securityContext: runAsUser: 1000 runAsGroup: 1000 resources: limits: memory: 6Gi requests: cpu: 1260m memory: 6Gi - command: - /bin/sh - -c - sleep infinity image: fullstorydev/grpcurl:latest-alpine name: grpcurl securityContext: runAsUser: 1000 runAsGroup: 1000 resources: limits: memory: 128Mi requests: cpu: 50m memory: 128Mi tolerations: - effect: NoSchedule key: karpenter/lifecycle operator: Equal value: ondemand ```Node pool spec
### NodePool ```yaml # Source: karpenter/templates/nodepool.yaml apiVersion: karpenter.sh/v1beta1 kind: NodePool metadata: name: ondemand-default namespace: karpenter spec: # Template section that describes how to template out NodeClaim resources that Karpenter will provision # Karpenter will consider this template to be the minimum requirements needed to provision a Node using this NodePool # It will overlay this NodePool with Pods that need to schedule to further constrain the NodeClaims # Karpenter will provision to launch new Nodes for the cluster template: metadata: # Labels are arbitrary key-values that are applied to all nodes labels: node.kubernetes.io/lifecycle: ondemand spec: # References the Cloud Provider's NodeClass resource, see your cloud provider specific documentation nodeClassRef: name: al2 # Provisioned nodes will have these taints # Taints may prevent pods from scheduling if they are not tolerated by the pod. taints: - key: karpenter/lifecycle effect: NoSchedule value: ondemand # Requirements that constrain the parameters of provisioned nodes. # These requirements are combined with pod.spec.topologySpreadConstraints, pod.spec.affinity.nodeAffinity, pod.spec.affinity.podAffinity, and pod.spec.nodeSelector rules. # Operators { In, NotIn, Exists, DoesNotExist, Gt, and Lt } are supported. # https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#operators requirements: - key: "karpenter.k8s.aws/instance-category" operator: In values: ["c", "m", "r"] - key: "karpenter.k8s.aws/instance-cpu" operator: In values: - "2" - "4" - key: "karpenter.k8s.aws/instance-hypervisor" operator: In values: ["nitro"] - key: "karpenter.k8s.aws/instance-generation" operator: In values: ["6"] - key: "topology.kubernetes.io/zone" operator: In values: ["eu-west-1a", "eu-west-1b", "eu-west-1c"] - key: "kubernetes.io/arch" operator: In values: ["amd64"] - key: "karpenter.sh/capacity-type" operator: In values: ["on-demand"] # Karpenter provides the ability to specify a few additional Kubelet args. # These are all optional and provide support for additional customization and use cases. kubelet: systemReserved: cpu: 100m memory: 205Mi ephemeral-storage: 1Gi kubeReserved: cpu: 100m memory: 384Mi ephemeral-storage: 1Gi imageGCHighThresholdPercent: 70 imageGCLowThresholdPercent: 50 # Disruption section which describes the ways in which Karpenter can disrupt and replace Nodes # Configuration in this section constrains how aggressive Karpenter can be with performing operations # like rolling Nodes due to them hitting their maximum lifetime (expiry) or scaling down nodes to reduce cluster cost disruption: # Describes which types of Nodes Karpenter should consider for consolidation # If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost # If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods consolidationPolicy: WhenUnderutilized # The amount of time a Node can live on the cluster before being removed # Avoiding long-running Nodes helps to reduce security vulnerabilities as well as to reduce the chance of issues that can plague Nodes with long uptimes such as file fragmentation or memory leaks from system processes # You can choose to disable expiration entirely by setting the string value 'Never' here expireAfter: Never # Resource limits constrain the total size of the cluster. # Limits prevent Karpenter from creating new instances once the limit is exceeded. limits: cpu: 40 memory: 160Gi # Priority given to the NodePool when the scheduler considers which NodePool # to select. Higher weights indicate higher priority when comparing NodePools. # Specifying no weight is equivalent to specifying a weight of 0. weight: 10 ```Versions:
kubectl version
):