kubernetes-sigs / karpenter

Karpenter is a Kubernetes Node Autoscaler built for flexibility, performance, and simplicity.
Apache License 2.0
526 stars 173 forks source link

Improve Unit Test Performance #1434

Open ellistarn opened 1 month ago

ellistarn commented 1 month ago

Description

What problem are you trying to solve? Our unit tests currently take ~11m to run

❯ ginkgo --json-report=report.json ./pkg/...  
...
❯ cat report.json | jq -r '
    [
        .[].SpecReports.[]?
        | select( .State | match("passed") )
        | select( .LeafNodeType | match("It"))
        | {
            Context: ( .ContainerHierarchyTexts | join("/")),
            Test: .LeafNodeText,
            Duration: (.RunTime * .000000001)
        }
    ]
    | sort_by( .Duration )
    | reverse
' | head -n 1000
[
  {
    "Context": "Simulate Scheduling",
    "Test": "should allow multiple replace operations to happen successively",
    "Duration": 32.523894042
  },
  {
    "Context": "Node Resource Level",
    "Test": "should nominate the node until the nomination time passes",
    "Duration": 21.033782709
  },
  {
    "Context": "Emptiness/Metrics",
    "Test": "should correctly report eligible nodes",
    "Duration": 16.133723833
  },
  {
    "Context": "Drift/Metrics",
    "Test": "should correctly report eligible nodes",
    "Duration": 16.115581041000002
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not evict a new pod with the same name using the old pod's eviction queue key",
    "Duration": 12.290470666000001
  },
  {
    "Context": "Consolidation/Parallelization",
    "Test": "should not consolidate a node that is launched for pods on a deleting node",
    "Duration": 11.220440708
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when nodes register provider id",
    "Duration": 11.005609917000001
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not race if deleting nodes in parallel",
    "Duration": 10.97944225
  },
  {
    "Context": "Drift/Drift",
    "Test": "should disrupt all empty drifted nodes in parallel",
    "Duration": 9.930346958000001
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should only allow 3 nodes to be deleted in single node consolidation delete",
    "Duration": 8.820141083000001
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should not mark multi node consolidated if all candidates can't be disrupted due to budgets with many nodepools",
    "Duration": 8.760382583
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should not mark single node consolidated if all candidates can't be disrupted due to budgets with many nodepools",
    "Duration": 8.75175025
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should not mark empty node consolidated if all candidates can't be disrupted due to budgets with many nodepools",
    "Duration": 8.736871333
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should allow no nodes from each nodePool to be deleted",
    "Duration": 8.7070925
  },
  {
    "Context": "Drift/Budgets",
    "Test": "should allow 2 nodes from each nodePool to be deleted",
    "Duration": 8.703012792
  },
  {
    "Context": "Emptiness/Budgets",
    "Test": "should allow all nodes from each nodePool to be deleted",
    "Duration": 8.696840542
  },
  {
    "Context": "Emptiness/Budgets",
    "Test": "should allow 2 nodes from each nodePool to be deleted",
    "Duration": 8.687430959
  },
  {
    "Context": "Drift/Budgets",
    "Test": "should allow all nodes from each nodePool to be deleted",
    "Duration": 8.680630833
  },
  {
    "Context": "Consolidation/Metrics",
    "Test": "should correctly report eligible nodes",
    "Duration": 8.562009208000001
  },
  {
    "Context": "Drift/Budgets",
    "Test": "should allow no empty nodes to be disrupted",
    "Duration": 8.286191500000001
  },
  {
    "Context": "Drift/Budgets",
    "Test": "should disrupt 3 nodes, taking into account commands in progress",
    "Duration": 8.284387542000001
  },
  {
    "Context": "Emptiness/Budgets",
    "Test": "should allow no empty nodes to be disrupted",
    "Duration": 8.2681325
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should not mark empty node consolidated if the candidates can't be disrupted due to budgets with one nodepool",
    "Duration": 8.267685333000001
  },
  {
    "Context": "Emptiness/Budgets",
    "Test": "should allow all empty nodes to be disrupted",
    "Duration": 8.265484
  },
  {
    "Context": "Emptiness/Budgets",
    "Test": "should only allow 3 empty nodes to be disrupted",
    "Duration": 8.262080000000001
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should allow no empty nodes to be disrupted",
    "Duration": 8.260982791
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should not mark single node consolidated if the candidates can't be disrupted due to budgets with one nodepool",
    "Duration": 8.257949208000001
  },
  {
    "Context": "Drift/Budgets",
    "Test": "should allow all empty nodes to be disrupted",
    "Duration": 8.249083417000001
  },
  {
    "Context": "Drift/Budgets",
    "Test": "should only allow 3 empty nodes to be disrupted",
    "Duration": 8.246726125
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should not mark multi node consolidated if the candidates can't be disrupted due to budgets with one nodepool",
    "Duration": 8.232516375000001
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "does not consolidate nodes with pods with blocking PDBs when the NodePool's TerminationGracePeriod is not nil",
    "Duration": 8.153741041
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "does not consolidate nodes with karpenter.sh/do-not-disrupt on pods when the NodePool's TerminationGracePeriod is not nil",
    "Duration": 8.143890417
  },
  {
    "Context": "Consolidation/Topology Consideration",
    "Test": "won't delete node if it would violate pod anti-affinity",
    "Duration": 8.133962792
  },
  {
    "Context": "Drift/Drift",
    "Test": "should drift one non-empty node at a time, starting with the earliest drift",
    "Duration": 8.128929084000001
  },
  {
    "Context": "Drift/Drift",
    "Test": "should continue to the next drifted node if the first cannot reschedule all pods",
    "Duration": 8.126853625
  },
  {
    "Context": "Drift/Drift",
    "Test": "can replace drifted nodes with multiple nodes",
    "Duration": 8.12449675
  },
  {
    "Context": "Metrics",
    "Test": "should fire metrics for single node delete disruption",
    "Duration": 8.120533458
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "won't delete node if it would require pods to schedule on an uninitialized node",
    "Duration": 8.110203292000001
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "cannot replace spot with spot if it is part of the 15 cheapest instance types.",
    "Duration": 8.099352042000001
  },
  {
    "Context": "Metrics",
    "Test": "should fire metrics for single node replace disruption",
    "Duration": 8.098215583
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "won't replace node if any spot replacement is more expensive",
    "Duration": 8.091043500000001
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "cannot replace spot with spot if the spotToSpotConsolidation is disabled",
    "Duration": 8.090226334
  },
  {
    "Context": "Metrics",
    "Test": "should fire metrics for single node empty disruption",
    "Duration": 8.089243
  },
  {
    "Context": "Drift/Drift",
    "Test": "can replace drifted nodes",
    "Duration": 8.088351417
  },
  {
    "Context": "Drift/Drift",
    "Test": "should untaint nodes when drift replacement fails",
    "Duration": 8.087380417
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "won't replace on-demand node if on-demand replacement is more expensive",
    "Duration": 8.08724175
  },
  {
    "Context": "Consolidation/Empty",
    "Test": "will not consider a node with a terminating StatefulSet pod as empty",
    "Duration": 8.085433375000001
  },
  {
    "Context": "Consolidation/Replace/Consolidation should fail if filterByPrice breaks the minimum requirement from the NodePools.",
    "Test": "if the candidate is on-demand node",
    "Duration": 8.083859667
  },
  {
    "Context": "Consolidation/Events",
    "Test": "should fire an event for ConsolidationDisabled when the NodePool has consolidateAfter set to 'Never'",
    "Duration": 8.083319709000001
  },
  {
    "Context": "Consolidation/Replace/Consolidation should fail if filterByPrice breaks the minimum requirement from the NodePools.",
    "Test": "if the candidate is spot node",
    "Duration": 8.082361
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "cannot replace spot with spot if less than minimum InstanceTypes flexibility",
    "Duration": 8.08082825
  },
  {
    "Context": "Disruption Taints",
    "Test": "should remove taints from NodeClaims that were left tainted from a previous disruption action",
    "Duration": 8.079282958
  },
  {
    "Context": "Emptiness/Events",
    "Test": "should fire an event for ConsolidationDisabled when the NodePool has consolidateAfter set to 'Never'",
    "Duration": 8.074173083
  },
  {
    "Context": "Emptiness/Emptiness",
    "Test": "can delete empty nodes",
    "Duration": 8.073918500000001
  },
  {
    "Context": "Drift/Drift",
    "Test": "should delete nodes with the karpenter.sh/do-not-disrupt annotation set to false",
    "Duration": 8.068663625000001
  },
  {
    "Context": "Drift/Drift",
    "Test": "can delete drifted nodes",
    "Duration": 8.065728375
  },
  {
    "Context": "Consolidation/Events",
    "Test": "should not fire an event for ConsolidationDisabled when the NodePool has consolidation set to WhenEmpty",
    "Duration": 8.063702459
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when nodes don't have provider id",
    "Duration": 5.98844975
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a nodeclaim isn't tracked",
    "Duration": 5.956088
  },
  {
    "Context": "Termination",
    "Test": "should delete the node and the CloudProvider NodeClaim when NodeClaim deletion is triggered",
    "Duration": 5.803826333
  },
  {
    "Context": "Termination",
    "Test": "should delete multiple Nodes if multiple Nodes map to the NodeClaim",
    "Duration": 5.6863575840000005
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when a combination of nodeclaims and node are tracked",
    "Duration": 5.524504
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a nodeclaim hasn't resolved its provider id",
    "Duration": 5.065551958
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when all nodeclaims are tracked",
    "Duration": 5.025458583000001
  },
  {
    "Context": "NodeClaimController/Termination failure",
    "Test": "should detect issues with a node that is stuck deleting due to a PDB",
    "Duration": 4.937743083
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should delete nodes",
    "Duration": 4.674330917000001
  },
  {
    "Context": "Registration",
    "Test": "should match the nodeClaim to the Node when the Node comes online",
    "Duration": 4.653033875
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when all nodes are tracked",
    "Duration": 4.340660833
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a node isn't tracked",
    "Duration": 4.321930292
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when the representation of nodes is the same",
    "Duration": 4.304416208
  },
  {
    "Context": "Initialization",
    "Test": "should consider the node to be initialized once all the resources are registered",
    "Duration": 4.220292459
  },
  {
    "Context": "Liveness",
    "Test": "should delete the nodeClaim when the Node hasn't registered past the registration ttl",
    "Duration": 4.2164779590000006
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should preemptively delete pods to satisfy their terminationGracePeriodSeconds",
    "Duration": 3.440397667
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should evict pods in order",
    "Duration": 3.4390829590000003
  },
  {
    "Context": "Termination",
    "Test": "should annotate the node if the NodeClaim has a terminationGracePeriod",
    "Duration": 3.4262987920000003
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should exclude nodes from load balancers when terminating",
    "Duration": 3.414806
  },
  {
    "Context": "Termination",
    "Test": "should not change the annotation if the NodeClaim has a terminationGracePeriod and the annotation already exists",
    "Duration": 3.4083171670000003
  },
  {
    "Context": "Termination",
    "Test": "should not delete the NodeClaim until all the Nodes are removed",
    "Duration": 3.3879076660000003
  },
  {
    "Context": "Termination",
    "Test": "should not annotate the node if the NodeClaim has no terminationGracePeriod",
    "Duration": 3.343226875
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should delete nodeclaims associated with nodes",
    "Duration": 3.2216667500000002
  },
  {
    "Context": "Registration",
    "Test": "should not re-sync the startupTaints to the Node when the startupTaints are removed",
    "Duration": 3.198619334
  },
  {
    "Context": "Initialization",
    "Test": "should not consider the Node to be initialized when all requested resources aren't registered",
    "Duration": 3.19543875
  },
  {
    "Context": "Initialization",
    "Test": "should consider the Node to be initialized once the startupTaints are removed",
    "Duration": 3.1818172500000004
  },
  {
    "Context": "Liveness",
    "Test": "should delete the NodeClaim when the NodeClaim hasn't launched past the registration ttl",
    "Duration": 3.178453459
  },
  {
    "Context": "Initialization",
    "Test": "should consider the nodeClaim initialized when all initialization conditions are met",
    "Duration": 3.1659988340000003
  },
  {
    "Context": "Initialization",
    "Test": "should add the initialization label to the node when the nodeClaim is initialized",
    "Duration": 3.1546763340000004
  },
  {
    "Context": "Initialization",
    "Test": "should consider the Node to be initialized once the ephemeralTaints are removed",
    "Duration": 3.1488386250000002
  },
  {
    "Context": "Launch",
    "Test": "should delete the nodeclaim if InsufficientCapacity is returned from the cloudprovider",
    "Duration": 3.1304977920000003
  },
  {
    "Context": "Rate Limiting",
    "Test": "should allow many events over time due to smoothed rate limiting",
    "Duration": 3.0058456660000004
  },
  {
    "Context": "Dedupe",
    "Test": "should allow the dedupe timeout to be overridden",
    "Duration": 3.001569667
  },
  {
    "Context": "Instance Type Selection",
    "Test": "should schedule on an instance with enough resources",
    "Duration": 2.739760625
  },
  {
    "Context": "GarbageCollection",
    "Test": "should delete the NodeClaim when the Node is there in a NotReady state and the instance is gone",
    "Duration": 2.669538417
  },
  {
    "Context": "Disruption",
    "Test": "should set multiple disruption conditions simultaneously",
    "Duration": 2.5856023340000003
  },
  {
    "Context": "Consolidation/Node Lifetime Consideration",
    "Test": "should consider node lifetime remaining when calculating disruption cost",
    "Duration": 2.5134506670000003
  },
  {
    "Context": "NodeUtils",
    "Test": "should return nodeClaim for node which has the same provider ID",
    "Duration": 2.497725334
  },
  {
    "Context": "Topology/Pod Affinity/Anti-Affinity",
    "Test": "should handle multiple dependent affinities",
    "Duration": 2.441003375
  },
  {
    "Context": "Topology/Combined Hostname, Zonal, and Capacity Type Topology",
    "Test": "should spread pods while respecting all constraints",
    "Duration": 2.3707257090000002
  },
  {
    "Context": "Simulate Scheduling",
    "Test": "should allow pods on deleting nodes to reschedule to uninitialized nodes",
    "Duration": 2.3068076250000003
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should wait for pods to terminate",
    "Duration": 2.263126417
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not evict pods that tolerate karpenter disruption taint with equal operator",
    "Duration": 2.260168416
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should only delete pods when their terminationGracePeriodSeconds is less than the the node's remaining terminationGracePeriod",
    "Duration": 2.253497083
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not delete nodes until all pods are deleted",
    "Duration": 2.2506315420000003
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should evict non-critical pods first",
    "Duration": 2.245968625
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should fail to evict pods that violate a PDB",
    "Duration": 2.238352792
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should delete nodes with no underlying instance even if not fully drained",
    "Duration": 2.235897542
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should evict pods that tolerate the node.kubernetes.io/unschedulable taint",
    "Duration": 2.23280675
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not evict pods that tolerate karpenter disruption taint with exists operator",
    "Duration": 2.232103458
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not delete nodes with no underlying instance if the node is still Ready",
    "Duration": 2.224313125
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should not evict static pods",
    "Duration": 2.223662542
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should delete nodes that have pods without an ownerRef",
    "Duration": 2.223279792
  },
  {
    "Context": "Termination/Metrics",
    "Test": "should fire the nodesTerminated counter metric when deleting nodes",
    "Duration": 2.221668333
  },
  {
    "Context": "Termination/Metrics",
    "Test": "should fire the terminationSummary metric when deleting nodes",
    "Duration": 2.22094025
  },
  {
    "Context": "Termination/Reconciliation",
    "Test": "should delete nodes with terminal pods",
    "Duration": 2.217918875
  },
  {
    "Context": "Termination",
    "Test": "should requeue reconciliation if cloudProvider Get returns an error other than NodeClaimNotFoundError",
    "Duration": 2.183665667
  },
  {
    "Context": "Termination",
    "Test": "should not remove the finalizer and terminate the NodeClaim if the cloudProvider instance is still around",
    "Duration": 2.159486792
  },
  {
    "Context": "Registration",
    "Test": "should sync the labels to the Node when the Node comes online",
    "Duration": 2.150637708
  },
  {
    "Context": "Registration",
    "Test": "should sync the startupTaints to the Node when the Node comes online",
    "Duration": 2.150029416
  },
  {
    "Context": "Registration",
    "Test": "should sync the taints to the Node when the Node comes online",
    "Duration": 2.1413073750000002
  },
  {
    "Context": "Initialization",
    "Test": "should not consider the Node to be initialized when the status of the Node is NotReady",
    "Duration": 2.140985416
  },
  {
    "Context": "Registration",
    "Test": "should sync the karpenter.sh/registered label to the Node and remove the karpenter.sh/unregistered taint when the Node comes online",
    "Duration": 2.136108875
  },
  {
    "Context": "Initialization",
    "Test": "should not consider the Node to be initialized when all startupTaints aren't removed",
    "Duration": 2.1346815830000003
  },
  {
    "Context": "Registration",
    "Test": "should add the owner reference to the Node when the Node comes online",
    "Duration": 2.133870167
  },
  {
    "Context": "Scheduling/VolumeUsage",
    "Test": "should launch nodes for pods with ephemeral volume using the newest storage class",
    "Duration": 2.133077959
  },
  {
    "Context": "Registration",
    "Test": "should sync the annotations to the Node when the Node comes online",
    "Duration": 2.129349583
  },
  {
    "Context": "Initialization",
    "Test": "should not consider the Node to be initialized when all ephemeralTaints aren't removed",
    "Duration": 2.128560959
  },
  {
    "Context": "Liveness",
    "Test": "shouldn't delete the nodeClaim when the node has registered past the registration ttl",
    "Duration": 2.124958834
  },
  {
    "Context": "Queue/Reconcile",
    "Test": "should keep nodes tainted when replacements haven't finished initialization",
    "Duration": 2.116940875
  },
  {
    "Context": "Taints",
    "Test": "should taint nodes with NodePool taints",
    "Duration": 2.1108731250000003
  },
  {
    "Context": "Registration",
    "Test": "should fail registration if the karpenter.sh/unregistered taint is not present on the node and the node isn't labeled as registered",
    "Duration": 2.104076959
  },
  {
    "Context": "Provisioning",
    "Test": "should provision nodes",
    "Duration": 2.101663625
  },
  {
    "Context": "Node Resource Level",
    "Test": "should not count pods not bound to nodes",
    "Duration": 2.0983097500000003
  },
  {
    "Context": "Counter",
    "Test": "should set well-known resource to zero when no nodes exist in the cluster",
    "Duration": 2.069204208
  },
  {
    "Context": "Static Drift Hash",
    "Test": "should update the drift hash when NodePool static field is updated",
    "Duration": 2.0657980840000003
  },
  {
    "Context": "TerminationUtils",
    "Test": "should not call cloudProvider Delete if the status condition is already Terminating",
    "Duration": 2.0647611660000003
  },
  {
    "Context": "Counter",
    "Test": "should set the NodePoolValidationSucceeded status condition to true if nodePool healthy checks succeed",
    "Duration": 2.053624833
  },
  {
    "Context": "Readiness",
    "Test": "should have status condition on nodePool as not ready when nodeClass does not exist",
    "Duration": 2.052427583
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "should consider initialized nodes before uninitialized nodes",
    "Duration": 2.043118542
  },
  {
    "Context": "Metrics",
    "Test": "should update the nodepool limit metrics",
    "Duration": 2.0374397920000002
  },
  {
    "Context": "CEL/Validation/Disruption",
    "Test": "should fail on negative expireAfter",
    "Duration": 2.0188456670000003
  },
  {
    "Context": "CEL/Validation/Disruption",
    "Test": "should fail on negative expireAfter",
    "Duration": 2.018241416
  },
  {
    "Context": "Scheduling/Existing Nodes",
    "Test": "should order initialized nodes for scheduling uninitialized nodes",
    "Duration": 1.7812367500000001
  },
  {
    "Context": "Termination",
    "Test": "should not delete nodes without provider ids if the NodeClaim hasn't been launched yet",
    "Duration": 1.2637877080000002
  },
  {
    "Context": "Termination",
    "Test": "should not call Delete() on the CloudProvider if the NodeClaim hasn't been launched yet",
    "Duration": 1.2131474580000001
  },
  {
    "Context": "Emptiness",
    "Test": "should remove the status condition from NodeClaims that have a StatefulSet pod in terminating state",
    "Duration": 1.211594459
  },
  {
    "Context": "Termination/Metrics",
    "Test": "should update the eviction queueDepth metric when reconciling pods",
    "Duration": 1.1370082910000001
  },
  {
    "Context": "Finalizer",
    "Test": "should add the finalizer if it doesn't exist",
    "Duration": 1.105011459
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should allow 2 nodes from each nodePool to be deleted",
    "Duration": 1.099006667
  },
  {
    "Context": "Launch",
    "Test": "should launch an instance when a new NodeClaim is created",
    "Duration": 1.093106541
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should allow all nodes from each nodePool to be deleted",
    "Duration": 1.0911595
  },
  {
    "Context": "Launch",
    "Test": "should add the Launched status condition after creating the NodeClaim",
    "Duration": 1.08493075
  },
  {
    "Context": "DaemonSet Controller",
    "Test": "should update daemonsetCache with the newest created pod",
    "Duration": 1.034196041
  },
  {
    "Context": "Launch",
    "Test": "should requeue with no error if NodeClassNotReady is returned from the cloudprovider",
    "Duration": 1.032859167
  },
  {
    "Context": "Scheduling/In-Flight Nodes",
    "Test": "should order initialized nodes for scheduling uninitialized nodes when all other nodes are inflight",
    "Duration": 0.9783280000000001
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should only allow 3 nodes to be deleted in multi node consolidation delete",
    "Duration": 0.700453834
  },
  {
    "Context": "Scheduling/Binpacking",
    "Test": "should pack small and large pods together",
    "Duration": 0.6829495
  },
  {
    "Context": "Scheduling/In-Flight Nodes",
    "Test": "should pack in-flight nodes before launching new nodes",
    "Duration": 0.6771475410000001
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should only allow 3 empty nodes to be disrupted",
    "Duration": 0.655712708
  },
  {
    "Context": "Consolidation/Multi-NodeClaim/can merge 3 nodes into 1",
    "Test": "if the candidate is spot node",
    "Duration": 0.6473607920000001
  },
  {
    "Context": "Consolidation/Topology Consideration",
    "Test": "can replace node maintaining zonal topology spread",
    "Duration": 0.6405065830000001
  },
  {
    "Context": "Consolidation/Multi-NodeClaim/can merge 3 nodes into 1",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.6336046670000001
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, considers karpenter.sh/do-not-disrupt on pods",
    "Test": "if the candidate is spot node",
    "Duration": 0.6329147500000001
  },
  {
    "Context": "Consolidation/Budgets",
    "Test": "should allow all empty nodes to be disrupted",
    "Duration": 0.630311084
  },
  {
    "Context": "Consolidation/Multi-NodeClaim",
    "Test": "can merge 3 nodes into 1 if the candidates have both spot and on-demand",
    "Duration": 0.629087166
  },
  {
    "Context": "Scheduling/Binpacking",
    "Test": "should create new nodes when a node is at capacity",
    "Duration": 0.626008875
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, considers karpenter.sh/do-not-disrupt on nodes",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.606644708
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, considers karpenter.sh/do-not-disrupt on pods",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.6031961250000001
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, considers karpenter.sh/do-not-disrupt on nodes",
    "Test": "if the candidate is spot node",
    "Duration": 0.601649584
  },
  {
    "Context": "Scheduling/VolumeUsage",
    "Test": "should launch a single node if all pods use the same PVC",
    "Duration": 0.5997259580000001
  },
  {
    "Context": "Consolidation/TTL",
    "Test": "should not delete node if pods schedule with karpenter.sh/do-not-disrupt during the TTL wait",
    "Duration": 0.595742
  },
  {
    "Context": "Consolidation/TTL",
    "Test": "should not delete node if pods schedule with a blocking PDB during the TTL wait",
    "Duration": 0.593221208
  },
  {
    "Context": "Disruption Taints",
    "Test": "should add and remove taints from NodeClaims that fail to disrupt",
    "Duration": 0.5929566660000001
  },
  {
    "Context": "Consolidation/Parallelization",
    "Test": "should schedule an additional node when receiving pending pods while consolidating",
    "Duration": 0.589504667
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "spot to spot consolidation should order the instance types by price before enforcing minimum flexibility.",
    "Duration": 0.587170375
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "spot to spot consolidation should consider the default for truncation if minimum number of instanceTypeOptions from minValues in requirement is less than 15.",
    "Duration": 0.578029792
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, considers PDB policy",
    "Test": "if the candidate is spot node",
    "Duration": 0.577755291
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, considers PDB policy",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.574503667
  },
  {
    "Context": "Consolidation/Replace/can replace node",
    "Test": "if the candidate is spot node",
    "Duration": 0.5740897500000001
  },
  {
    "Context": "Simulate Scheduling",
    "Test": "can replace node with a local PV (ignoring hostname affinity)",
    "Duration": 0.5734051250000001
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, PDB namespace must match",
    "Test": "if the candidate is spot node",
    "Duration": 0.572772709
  },
  {
    "Context": "Consolidation/Replace/can replace nodes if another nodePool returns no instance types",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.571311334
  },
  {
    "Context": "Metrics",
    "Test": "should fire metrics for multi-node replace disruption",
    "Duration": 0.571134
  },
  {
    "Context": "Node Resource Level",
    "Test": "should maintain a correct count of resource usage as pods are deleted/added",
    "Duration": 0.569504833
  },
  {
    "Context": "Consolidation/TTL",
    "Test": "should not replace node if a pod schedules with a blocking PDB during the TTL wait",
    "Duration": 0.567194792
  },
  {
    "Context": "Consolidation/TTL",
    "Test": "should not replace node if a pod schedules with karpenter.sh/do-not-disrupt during the TTL wait",
    "Duration": 0.566428708
  },
  {
    "Context": "Consolidation/Replace/can replace nodes, PDB namespace must match",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.565981458
  },
  {
    "Context": "Consolidation/Replace/can replace node",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.564417417
  },
  {
    "Context": "Consolidation/Replace/can replace nodes if another nodePool returns no instance types",
    "Test": "if the candidate is spot node",
    "Duration": 0.560647542
  },
  {
    "Context": "Metrics",
    "Test": "should fire metrics for multi-node delete disruption",
    "Duration": 0.5498057080000001
  },
  {
    "Context": "Consolidation/Replace",
    "Test": "spot to spot consolidation should consider the max of default and minimum number of instanceTypeOptions from minValues in requirement for truncation if minimum number of instanceTypeOptions from minValues in requirement is greater than 15.",
    "Duration": 0.526836166
  },
  {
    "Context": "Metrics",
    "Test": "should fire metrics for multi-node empty disruption",
    "Duration": 0.5257260420000001
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes",
    "Duration": 0.5246613330000001
  },
  {
    "Context": "Data Races",
    "Test": "should ensure that calling Synced() is valid while making updates to NodeClaims",
    "Duration": 0.5210924170000001
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes, considers karpenter.sh/do-not-disrupt on pods",
    "Duration": 0.519534833
  },
  {
    "Context": "Consolidation/Multi-NodeClaim/won't merge 2 nodes into 1 of the same type",
    "Test": "if the candidate is spot node",
    "Duration": 0.516923083
  },
  {
    "Context": "Consolidation/Multi-NodeClaim/won't merge 2 nodes into 1 of the same type",
    "Test": "if the candidate is on-demand node",
    "Duration": 0.5150926250000001
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes if another nodePool has no node template",
    "Duration": 0.515030583
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes, evicts pods without an ownerRef",
    "Duration": 0.5126138330000001
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes, considers karpenter.sh/do-not-disrupt on nodes",
    "Duration": 0.511253791
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes, considers PDB",
    "Duration": 0.508330417
  },
  {
    "Context": "Consolidation/Delete",
    "Test": "can delete nodes with a permanently pending pod",
    "Duration": 0.507839542
k8s-ci-robot commented 1 month ago

This issue is currently awaiting triage.

If Karpenter contributors determines this is a relevant issue, they will accept it by applying the triage/accepted label and provide further guidance.

The triage/accepted label can be added by org members by writing /triage accepted in a comment.

Instructions for interacting with me using PR comments are available [here](https://git.k8s.io/community/contributors/guide/pull-requests.md). If you have questions or suggestions related to my behavior, please file an issue against the [kubernetes-sigs/prow](https://github.com/kubernetes-sigs/prow/issues/new?title=Prow%20issue:) repository.
ellistarn commented 1 month ago

Looks like this is the next most interesting suite:

❯ ginkgo --json-report=report.json ./pkg/controllers/state           
Running Suite: Controllers/State - /Users/etarn/go/src/github.com/kubernetes-sigs/karpenter/pkg/controllers/state
=================================================================================================================
Random Seed: 1721259627

Will run 50 of 50 specs
••••••••••••••••••••••••••••••••••••••••••••••••••

Ran 50 of 50 Specs in 82.274 seconds
SUCCESS! -- 50 Passed | 0 Failed | 0 Pending | 0 Skipped
PASS

Ginkgo ran 1 suite in 1m27.219692709s
Test Suite Passed
 ~/go/src/github.com/kubernetes-sigs/karpenter │ testperf *1 ?2 ────────────────────────────────────────────────────────────────────────────── 1m 27s │ 16:41:54 
❯ cat report.json | jq -r '
    [
        .[].SpecReports.[]?
        | select( .State | match("passed") )
        | select( .LeafNodeType | match("It"))
        | {
            Context: ( .ContainerHierarchyTexts | join("/")),
            Test: .LeafNodeText,
            Duration: (.RunTime * .000000001)
        }
    ]
    | sort_by( .Duration )
    | reverse
' | head -n 999
[
  {
    "Context": "Node Resource Level",
    "Test": "should nominate the node until the nomination time passes",
    "Duration": 21.0333535
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when nodes register provider id",
    "Duration": 10.97940325
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when nodes don't have provider id",
    "Duration": 6.0300515830000005
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a nodeclaim isn't tracked",
    "Duration": 5.881686209000001
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when a combination of nodeclaims and node are tracked",
    "Duration": 5.5259662910000005
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a nodeclaim hasn't resolved its provider id",
    "Duration": 5.143172209
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when all nodeclaims are tracked",
    "Duration": 5.0917059590000004
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when the representation of nodes is the same",
    "Duration": 4.420585583
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a node isn't tracked",
    "Duration": 4.31708725
  },
  {
    "Context": "Cluster State Sync",
    "Test": "should consider the cluster state synced when all nodes are tracked",
    "Duration": 4.3018652500000005
  },
  {
    "Context": "Pod Anti-Affinity",
    "Test": "should track pods with required anti-affinity",
    "Duration": 2.097221375
  },
  {
    "Context": "DaemonSet Controller",
    "Test": "should update daemonsetCache with the newest created pod",
    "Duration": 1.044328042
  },
  {
    "Context": "Node Resource Level",
    "Test": "should maintain a correct count of resource usage as pods are deleted/added",
    "Duration": 0.584069625
  },
  {
    "Context": "Data Races",
    "Test": "should ensure that calling Synced() is valid while making updates to NodeClaims",
    "Duration": 0.5499530420000001
  },
  {
    "Context": "Data Races",
    "Test": "should ensure that calling Synced() is valid while making updates to Nodes",
    "Duration": 0.41159662500000005
  },
  {
    "Context": "Volume Usage/Limits",
    "Test": "should maintain the volume usage state when receiving NodeClaim updates",
    "Duration": 0.08291237500000001
  },
  {
    "Context": "Volume Usage/Limits",
    "Test": "should ignore the volume usage limits breach if the pod update is for an already tracked pod",
    "Duration": 0.08225320800000001
  },
  {
    "Context": "Volume Usage/Limits",
    "Test": "should hydrate the volume usage on a Node update",
    "Duration": 0.07980158300000001
  },
  {
    "Context": "HostPort Usage",
    "Test": "should maintain the host port usage state when receiving NodeClaim updates",
    "Duration": 0.07773129100000001
  },
  {
    "Context": "HostPort Usage",
    "Test": "should hydrate the HostPort usage on a Node update",
    "Duration": 0.059204375000000004
  },
  {
    "Context": "HostPort Usage",
    "Test": "should ignore the host port usage conflict if the pod update is for an already tracked pod",
    "Duration": 0.05657450000000001
  },
  {
    "Context": "Node Resource Level",
    "Test": "should track pods correctly if we miss events or they are consolidated",
    "Duration": 0.035602834
  },
  {
    "Context": "Taints/Managed",
    "Test": "should consider ephemeral taints on a managed node after the node is initialized",
    "Duration": 0.034931542
  },
  {
    "Context": "Node Resource Level",
    "Test": "should handle a node changing from no providerID to registering a providerID",
    "Duration": 0.033315625
  },
  {
    "Context": "Node Resource Level",
    "Test": "should subtract requests if the pod is deleted",
    "Duration": 0.031978
  },
  {
    "Context": "Taints/Managed",
    "Test": "should consider startup taints on a managed node after the node is initialized",
    "Duration": 0.031303792000000004
  },
  {
    "Context": "Node Resource Level",
    "Test": "should not add requests if the pod is terminal",
    "Duration": 0.031153792000000003
  },
  {
    "Context": "DaemonSet Controller",
    "Test": "should delete daemonset in cache when daemonset is deleted",
    "Duration": 0.030477708000000003
  },
  {
    "Context": "Node Resource Level",
    "Test": "should track daemonset requested resources separately",
    "Duration": 0.029702000000000003
  },
  {
    "Context": "Node Resource Level",
    "Test": "should count new pods bound to nodes",
    "Duration": 0.029567417000000002
  },
  {
    "Context": "Node Resource Level",
    "Test": "should not count pods not bound to nodes",
    "Duration": 0.028627209
  },
  {
    "Context": "Node Resource Level",
    "Test": "should stop tracking nodes that are deleted",
    "Duration": 0.028294958000000002
  },
  {
    "Context": "Node Resource Level",
    "Test": "should count existing pods bound to nodes",
    "Duration": 0.027639416000000003
  },
  {
    "Context": "Pod Anti-Affinity",
    "Test": "should stop tracking pods with required anti-affinity if the pod is deleted",
    "Duration": 0.027246959
  },
  {
    "Context": "Pod Anti-Affinity",
    "Test": "should not track pods with preferred anti-affinity",
    "Duration": 0.027089750000000003
  },
  {
    "Context": "Node Resource Level",
    "Test": "should mark node for deletion when nodeclaim is deleted",
    "Duration": 0.026867041
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a nodeclaim without a providerID is deleted",
    "Duration": 0.025000209000000002
  },
  {
    "Context": "Taints/Managed",
    "Test": "should consider startup taints on a managed node that isn't initialized",
    "Duration": 0.024672125000000003
  },
  {
    "Context": "Pod Anti-Affinity",
    "Test": "should handle events out of order",
    "Duration": 0.024533959
  },
  {
    "Context": "Taints/Unmanaged",
    "Test": "should consider ephemeral taints on an unmanaged node after the node is initialized",
    "Duration": 0.024231000000000003
  },
  {
    "Context": "Node Deletion",
    "Test": "should not leak a state node when the NodeClaim and Node names match",
    "Duration": 0.024045125
  },
  {
    "Context": "Taints/Managed",
    "Test": "should not consider ephemeral taints on a managed node that isn't initialized",
    "Duration": 0.023652041000000002
  },
  {
    "Context": "Consolidated State",
    "Test": "should cause consolidation state to change when a NodePool is updated",
    "Duration": 0.021376542000000002
  },
  {
    "Context": "Node Resource Level",
    "Test": "should mark node for deletion when node is deleted",
    "Duration": 0.020735333
  },
  {
    "Context": "DaemonSet Controller",
    "Test": "should update daemonsetCache when daemonset pod is created",
    "Duration": 0.019265541
  },
  {
    "Context": "Taints/Unmanaged",
    "Test": "should consider ephemeral taints on an unmanaged node that isn't initialized",
    "Duration": 0.018855291
  },
  {
    "Context": "Consolidated State",
    "Test": "should update the consolidated value when setting consolidation",
    "Duration": 0.0185365
  },
  {
    "Context": "Cluster State Sync",
    "Test": "shouldn't consider the cluster state synced if a nodeclaim is added manually with UpdateNodeClaim",
    "Duration": 0.018498917
  },
  {
    "Context": "DaemonSet Controller",
    "Test": "should not update daemonsetCache when daemonset pod is not present",
    "Duration": 0.017861375000000002
  },
  {
    "Context": "Consolidated State",
    "Test": "should update the consolidated value when consolidation timeout (5m) has passed and state hasn't changed",
    "Duration": 0.016985167000000002
  }
]