iterative / terraform-provider-iterative

☁️ Terraform plugin for machine learning workloads: spot instance recovery & auto-termination | AWS, GCP, Azure, Kubernetes
https://registry.terraform.io/providers/iterative/iterative/latest/docs
Apache License 2.0
287 stars 27 forks source link

Fix k8s tag propagation and controller-uid detection #656

Closed sjawhar closed 1 year ago

sjawhar commented 1 year ago

Purpose

Approach

Logs

This is what kubectl describe job shows on master:

Without custom labels/tags:

Labels:         controller-uid=5cf76b42-cbc8-484a-b496-6b867182e523
                job-name=tpi-3aw4vu6287ot1-2biu93iu-1tw3q6af
Annotations:    batch.kubernetes.io/job-tracking: 

With labels:

Labels:         app.kubernetes.io/instance=jenkins
Annotations:    app.kubernetes.io/instance: jenkins
                batch.kubernetes.io/job-tracking: 
0x2b3bfa0 commented 1 year ago

Thank you very much, @sjawhar!

Additionally, would you like to git apply also the following patch to get rid of task.Tags forever?

From e80d27247fde2e116920244fb1e133c442f87f3e Mon Sep 17 00:00:00 2001
From: Helio Machado <0x2b3bfa0+git@googlemail.com>
Date: Mon, 5 Sep 2022 23:19:52 +0000
Subject: [PATCH] Remove deprecated Task.Tags

---
 cmd/create/create.go                          |  2 ++
 task/aws/task.go                              |  2 +-
 .../resource_virtual_machine_scale_set.go     |  2 --
 task/az/task.go                               | 33 ++++++++++---------
 task/common/values.go                         |  1 -
 task/gcp/task.go                              |  2 +-
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/cmd/create/create.go b/cmd/create/create.go
index 9a9217d..d236a89 100644
--- a/cmd/create/create.go
+++ b/cmd/create/create.go
@@ -69,6 +69,8 @@ func (o *Options) Run(cmd *cobra.Command, args []string, cloud *common.Cloud) er
        }
    }

+   cloud.Tags = o.Tags
+
    script := o.Script
    if !strings.HasPrefix(script, "#!") {
        script = "#!/bin/sh\n" + script
diff --git a/task/aws/task.go b/task/aws/task.go
index e312692..9de5df9 100644
--- a/task/aws/task.go
+++ b/task/aws/task.go
@@ -23,7 +23,7 @@ func List(ctx context.Context, cloud common.Cloud) ([]common.Identifier, error)
 }

 func New(ctx context.Context, cloud common.Cloud, identifier common.Identifier, task common.Task) (*Task, error) {
-   client, err := client.New(ctx, cloud, task.Tags)
+   client, err := client.New(ctx, cloud, cloud.Tags)
    if err != nil {
        return nil, err
    }
diff --git a/task/az/resources/resource_virtual_machine_scale_set.go b/task/az/resources/resource_virtual_machine_scale_set.go
index 11669cd..c50d528 100644
--- a/task/az/resources/resource_virtual_machine_scale_set.go
+++ b/task/az/resources/resource_virtual_machine_scale_set.go
@@ -28,7 +28,6 @@ func NewVirtualMachineScaleSet(client *client.Client, identifier common.Identifi
    v.Attributes.Size = task.Size
    v.Attributes.Environment = task.Environment
    v.Attributes.Firewall = task.Firewall
-   v.Attributes.Tags = task.Tags
    v.Attributes.Parallelism = &task.Parallelism
    v.Attributes.Spot = float64(task.Spot)
    v.Dependencies.ResourceGroup = resourceGroup
@@ -46,7 +45,6 @@ type VirtualMachineScaleSet struct {
        Size        common.Size
        Environment common.Environment
        Firewall    common.Firewall
-       Tags        map[string]string
        Parallelism *uint16
        Spot        float64
        Addresses   []net.IP
diff --git a/task/az/task.go b/task/az/task.go
index acd0799..304d813 100644
--- a/task/az/task.go
+++ b/task/az/task.go
@@ -23,7 +23,7 @@ func List(ctx context.Context, cloud common.Cloud) ([]common.Identifier, error)
 }

 func New(ctx context.Context, cloud common.Cloud, identifier common.Identifier, task common.Task) (*Task, error) {
-   client, err := client.New(ctx, cloud, task.Tags)
+   client, err := client.New(ctx, cloud, cloud.Tags)
    if err != nil {
        return nil, err
    }
@@ -137,10 +137,10 @@ func (t *Task) Create(ctx context.Context) error {
    }}
    if t.Attributes.Environment.Directory != "" {
        steps = append(steps, common.Step{
-       Description: "Uploading Directory...",
-       Action: func(ctx context.Context) error {
+           Description: "Uploading Directory...",
+           Action: func(ctx context.Context) error {
                return t.Push(ctx, t.Attributes.Environment.Directory)
-       },
+           },
        })
    }
    steps = append(steps, common.Step{
@@ -201,8 +201,8 @@ func (t *Task) Delete(ctx context.Context) error {
    if t.Read(ctx) == nil {
        if t.Attributes.Environment.DirectoryOut != "" {
            steps = []common.Step{{
-               Description:    "Downloading Directory...",
-               Action: func(ctx context.Context)error {
+               Description: "Downloading Directory...",
+               Action: func(ctx context.Context) error {
                    err := t.Pull(ctx, t.Attributes.Environment.Directory, t.Attributes.Environment.DirectoryOut)
                    if err != nil && err != common.NotFoundError {
                        return err
@@ -211,7 +211,7 @@ func (t *Task) Delete(ctx context.Context) error {
                },
            }, {
                Description: "Emptying Bucket...",
-               Action: func(ctx context.Context)error {
+               Action: func(ctx context.Context) error {
                    err := machine.Delete(ctx, t.DataSources.Credentials.Resource["RCLONE_REMOTE"])
                    if err != nil && err != common.NotFoundError {
                        return err
@@ -219,28 +219,29 @@ func (t *Task) Delete(ctx context.Context) error {
                    return nil
                },
            }}
-       }}
+       }
+   }
    steps = append(steps, []common.Step{{
        Description: "Deleting VirtualMachineScaleSet...",
-       Action: t.Resources.VirtualMachineScaleSet.Delete,
-   },{
+       Action:      t.Resources.VirtualMachineScaleSet.Delete,
+   }, {
        Description: "Deleting Subnet...",
-       Action: t.Resources.Subnet.Delete,
+       Action:      t.Resources.Subnet.Delete,
    }, {
        Description: "Deleting SecurityGroup...",
-       Action: t.Resources.SecurityGroup.Delete,
+       Action:      t.Resources.SecurityGroup.Delete,
    }, {
        Description: "Deleting VirtualNetwork...",
-       Action: t.Resources.VirtualNetwork.Delete,
+       Action:      t.Resources.VirtualNetwork.Delete,
    }, {
        Description: "Deleting BlobContainer...",
-       Action: t.Resources.BlobContainer.Delete,
+       Action:      t.Resources.BlobContainer.Delete,
    }, {
        Description: "Deleting StorageAccount...",
-       Action: t.Resources.StorageAccount.Delete,
+       Action:      t.Resources.StorageAccount.Delete,
    }, {
        Description: "Deleting ResourceGroup...",
-       Action: t.Resources.ResourceGroup.Delete,
+       Action:      t.Resources.ResourceGroup.Delete,
    }}...)
    if err := common.RunSteps(ctx, steps); err != nil {
        return err
diff --git a/task/common/values.go b/task/common/values.go
index 5006c23..7c0199b 100644
--- a/task/common/values.go
+++ b/task/common/values.go
@@ -50,7 +50,6 @@ type Task struct {
    PermissionSet string
    Spot          Spot
    Parallelism   uint16
-   Tags          map[string]string // Deprecated

    Addresses []net.IP
    Status    Status
diff --git a/task/gcp/task.go b/task/gcp/task.go
index d91e44b..687fcd6 100644
--- a/task/gcp/task.go
+++ b/task/gcp/task.go
@@ -23,7 +23,7 @@ func List(ctx context.Context, cloud common.Cloud) ([]common.Identifier, error)
 }

 func New(ctx context.Context, cloud common.Cloud, identifier common.Identifier, task common.Task) (*Task, error) {
-   client, err := client.New(ctx, cloud, task.Tags)
+   client, err := client.New(ctx, cloud, cloud.Tags)
    if err != nil {
        return nil, err
    }
-- 
2.30.2
0x2b3bfa0 commented 1 year ago

Finally tests are passing! 😅