Open miteshbsjat opened 3 days ago
It's an interesting point and I'm not 100% sure if this has been solved in recent versions. Which KEDA version are you using?
Used keda version 2.13.1
initially, then switched to 2.14
$ git diff
diff --git a/controllers/keda/scaledjob_controller.go b/controllers/keda/scaledjob_controller.go
index 98c1ce87..1c09f232 100755
--- a/controllers/keda/scaledjob_controller.go
+++ b/controllers/keda/scaledjob_controller.go
@@ -279,11 +279,22 @@ func (r *ScaledJobReconciler) deletePreviousVersionScaleJobs(ctx context.Context
return "Cannot get list of Jobs owned by this scaledJob", err
}
+ generation := strconv.FormatInt(scaledJob.GetGeneration(), 10)
if len(jobs.Items) > 0 {
logger.Info("RolloutStrategy: immediate, Deleting jobs owned by the previous version of the scaledJob", "numJobsToDelete", len(jobs.Items))
}
for _, job := range jobs.Items {
job := job
+ jobGenerationStr, label_present := job.GetLabels()["scaledjob-generation"]
+ if label_present {
+ logger.Info("scaledjob-generation label present", "generation", generation,
+ "jobGeneration", jobGenerationStr, "job.Generation", job.Generation)
+ jobGeneration, _ := strconv.ParseInt(jobGenerationStr, 10, 64)
+ if jobGeneration >= scaledJob.GetGeneration() {
+ logger.Info("Not deleting current generation jobs")
+ continue
+ }
+ }
propagationPolicy := metav1.DeletePropagationBackground
if scaledJob.Spec.Rollout.PropagationPolicy == "foreground" {
diff --git a/pkg/scaling/executor/scale_jobs.go b/pkg/scaling/executor/scale_jobs.go
index 52f7ea37..1fcce707 100644
--- a/pkg/scaling/executor/scale_jobs.go
+++ b/pkg/scaling/executor/scale_jobs.go
@@ -126,12 +126,14 @@ func (e *scaleExecutor) generateJobs(logger logr.Logger, scaledJob *kedav1alpha1
}
scaledJob.Spec.JobTargetRef.Template.Labels["scaledjob.keda.sh/name"] = scaledJob.GetName()
+ scaledJobGeneration := strconv.FormatInt(scaledJob.GetGeneration(), 10)
labels := map[string]string{
"app.kubernetes.io/name": scaledJob.GetName(),
"app.kubernetes.io/version": version.Version,
"app.kubernetes.io/part-of": scaledJob.GetName(),
"app.kubernetes.io/managed-by": "keda-operator",
"scaledjob.keda.sh/name": scaledJob.GetName(),
+ "scaledjob-generation": scaledJobGeneration,
}
for key, value := range scaledJob.ObjectMeta.Labels {
labels[key] = value
2024-05-31T07:22:38Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of running Jobs": 2}
2024-05-31T07:22:38Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of pending Jobs ": 0}
2024-05-31T07:22:38Z INFO scaleexecutor Creating jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Effective number of max jobs": 0}
2024-05-31T07:22:38Z INFO scaleexecutor Creating jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of jobs": 0}
2024-05-31T07:22:38Z INFO scaleexecutor Created jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of jobs": 0}
2024-05-31T07:22:39Z INFO Reconciling ScaledJob {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "ecde4b22-0cbe-4a6d-a77a-780242fccb8e"}
2024-05-31T07:22:39Z INFO RolloutStrategy: immediate, Deleting jobs owned by the previous version of the scaledJob {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "ecde4b22-0cbe-4a6d-a77a-780242fccb8e", "numJobsToDelete": 2}
2024-05-31T07:22:39Z INFO scaledjob-generation label present {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "ecde4b22-0cbe-4a6d-a77a-780242fccb8e", "generation": "3", "jobGeneration": "2", "job.Generation": 1}
2024-05-31T07:22:39Z INFO scaledjob-generation label present {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "ecde4b22-0cbe-4a6d-a77a-780242fccb8e", "generation": "3", "jobGeneration": "2", "job.Generation": 1}
2024-05-31T07:22:39Z INFO Initializing Scaling logic according to ScaledJob Specification {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "ecde4b22-0cbe-4a6d-a77a-780242fccb8e"}
2024-05-31T07:22:39Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of running Jobs": 0}
2024-05-31T07:22:39Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of pending Jobs ": 0}
2024-05-31T07:22:39Z INFO scaleexecutor Creating jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Effective number of max jobs": 3}
scaledjob-generation
label matches with ScaledJob.Generation
2024-05-31T07:25:06Z INFO scaledjob-generation label present {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99", "generation": "3", "jobGeneration": "3", "job.Generation": 1}
2024-05-31T07:25:06Z INFO Not deleting current generation jobs {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99"}
2024-05-31T07:25:06Z INFO scaledjob-generation label present {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99", "generation": "3", "jobGeneration": "3", "job.Generation": 1}
2024-05-31T07:25:06Z INFO Not deleting current generation jobs {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99"}
2024-05-31T07:25:06Z INFO scaledjob-generation label present {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99", "generation": "3", "jobGeneration": "3", "job.Generation": 1}
2024-05-31T07:25:06Z INFO Not deleting current generation jobs {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99"}
2024-05-31T07:25:06Z INFO Initializing Scaling logic according to ScaledJob Specification {"controller": "scaledjob", "controllerGroup": "keda.sh", "controllerKind": "ScaledJob", "ScaledJob": {"name":"rabbitmq-consumer","namespace":"default"}, "namespace": "default", "name": "rabbitmq-consumer", "reconcileID": "cd260534-acab-420b-8e28-2bd184303b99"}
2024-05-31T07:25:06Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of running Jobs": 3}
2024-05-31T07:25:06Z INFO scaleexecutor Scaling Jobs {"scaledJob.Name": "rabbitmq-consumer", "scaledJob.Namespace": "default", "Number of pending Jobs ": 0}
2024-05-31T07:25:08Z INFO cert-rotation CA certs are injected to webhooks
AGE
field
$ kubectl get pods
NAME READY STATUS RESTARTS AGE
rabbitmq-0 1/1 Running 1 (91m ago) 16d
rabbitmq-consumer-6jldg-zrjn9 1/1 Running 0 3m13s
rabbitmq-consumer-dhnr7-d29db 1/1 Running 0 3m13s
rabbitmq-consumer-n92jb-84kz8 1/1 Running 0 3m13s
rabbitmq-publish-wmcdf 0/1 Completed 0 5m36s
However, when I forked it, and tried to implement above logic into the current code 2.15+
, it has been discovered that, similar logic has been implemented in v2.15 scaledjob_controller.go
KEDA is deleting
Jobs
created byScaledJobs
with rollout strategydefault
on startup. Rollout strategydefault
is supposed to terminate existing Jobs whenever a ScaledJob is being updated , but KEDA appears to incorrectly interpret reconciliation related to initial controller watch establishment as updates, causingJobs
with current configuration to be terminated prematurely.This is causing artificial job delays for users, especially if KEDA restarts frequently while Jobs are in progress.
A possible solution is to modify KEDA to distinguish between ScaledJob modifications and controller startup.