aws-ia / terraform-aws-eks-blueprints

Configure and deploy complete EKS clusters.
https://aws-ia.github.io/terraform-aws-eks-blueprints/
Apache License 2.0
2.69k stars 1.42k forks source link

Karpenter helm chart always wants to update due to password token change. #1970

Closed jamesmaccoll closed 2 months ago

jamesmaccoll commented 3 months ago

Description

Whenever I terraform apply, I get the following:

  ~ update in-place

Terraform will perform the following actions:

  # module.eks_blueprints_addons.module.karpenter.helm_release.this[0] will be updated in-place
  ~ resource "helm_release" "this" {
        id                         = "karpenter"
        name                       = "karpenter"
      ~ repository_password        = (sensitive value)
        # (30 unchanged attributes hidden)

        # (9 unchanged blocks hidden)
    }

Plan: 0 to add, 1 to change, 0 to destroy.

I've seen the previous convo here: https://github.com/aws-ia/terraform-aws-eks-blueprints/issues/1686, so can't work out why I'm still getting this error when my code looks 'right'.

Screenshot 2024-07-01 at 16 32 22

Versions

Reproduction Code [Required]

################################################################################
# providers
################################################################################
terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "5.53.0"
    }
    helm = {
      source  = "hashicorp/helm"
      version = ">= 2.13.2"
    }
    kubectl = {
      source  = "alekc/kubectl"
      version = "2.0.4"
    }
    kubernetes = {
      source  = "hashicorp/kubernetes"
      version = "2.30"
    }
    time = {
      source  = "hashicorp/time"
      version = "0.11.1"
    }
    tls = {
      source  = "hashicorp/tls"
      version = "4.0.5"
    }
  }
}

provider "aws" {
  region = "us-east-1"
  alias  = "us_east_1"
}

provider "kubernetes" {
  host                   = module.eks_cluster.cluster_endpoint
  cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data)

  exec {
    api_version = "client.authentication.k8s.io/v1beta1"
    command     = "aws"
    # This requires the awscli to be installed locally where Terraform is executed
    args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name]
  }
}

provider "kubectl" { # used by kubectl_manifests
  apply_retry_count      = 5
  host                   = module.eks_cluster.cluster_endpoint
  cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data)
  load_config_file       = false

  exec {
    api_version = "client.authentication.k8s.io/v1beta1"
    command     = "aws"
    # This requires the awscli to be installed locally where Terraform is executed
    args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name]
  }
}

provider "helm" {
  kubernetes {
    host                   = module.eks_cluster.cluster_endpoint
    cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data)

    exec {
      api_version = "client.authentication.k8s.io/v1beta1"
      command     = "aws"
      # This requires the awscli to be installed locally where Terraform is executed
      args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name]
    }
  }
}

# this provider prevents the karpenter chart from constantly
#regenerating helm releases because of repository_password reading in the cluster on apply
provider "helm" {
  alias = "karpenter"
  kubernetes {
    host                   = module.eks_cluster.cluster_endpoint
    cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data)

    exec {
      api_version = "client.authentication.k8s.io/v1beta1"
      command     = "aws"
      # This requires the awscli to be installed locally where Terraform is executed
      args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name]
    }
  }

  registry {
    url      = "oci://public.ecr.aws"
    username = data.aws_ecrpublic_authorization_token.token.user_name
    password = data.aws_ecrpublic_authorization_token.token.password
  }
}

################################################################################
# EKS Cluster
################################################################################
module "eks_cluster" {
  source  = "terraform-aws-modules/eks/aws"
  version = "20.14.0"

  access_entries                           = var.access_entries
  authentication_mode                      = "API"
  create_cloudwatch_log_group              = true
  cluster_name                             = var.eks_cluster_name
  cluster_version                          = var.eks_cluster_version
  cluster_endpoint_public_access           = true
  cluster_endpoint_private_access          = true
  control_plane_subnet_ids                 = local.private_subnets
  enable_cluster_creator_admin_permissions = false
  node_security_group_tags = {
    "karpenter.sh/discovery"                        = var.eks_cluster_name # required by karpenter
    "kubernetes.io/cluster/${var.eks_cluster_name}" = null                 # required by lb_controller
  }
  subnet_ids = local.eks_subnets
  vpc_id     = local.vpc_id

  cluster_addons = {

    # not compatible with eks 1.30 yet
    # aws eks describe-addon-versions --kubernetes-version 1.30 --addon-name adot
    # https://github.com/aws/containers-roadmap/issues/2365
    #
    #    adot = {
    #      addon_version = var.adot_addon_version
    #    }

    coredns = {
      addon_version = var.coredns_addon_version
      configuration_values = jsonencode({
        computeType = "fargate"
        resources = {
          limits = {
            cpu    = "0.25"
            memory = "256M"
          }
          requests = {
            cpu    = "0.25"
            memory = "256M"
          }
        }
      })
    }

    eks-pod-identity-agent = {
      addon_version = var.eks_pod_identity_agent_addon_version
      configuration_values = jsonencode({
        resources = {
          limits = {
            cpu    = "0.25"
            memory = "256M"
          }
          requests = {
            cpu    = "0.25"
            memory = "256M"
          }
        }
      })
    }

    kube-proxy = {
      addon_version = var.kube_proxy_addon_version
      configuration_values = jsonencode({
        resources = {
          limits = {
            cpu    = "0.25"
            memory = "256M"
          }
          requests = {
            cpu    = "0.25"
            memory = "256M"
          }
        }
      })
    }

    vpc-cni = {
      addon_version  = var.vpc_cni_addon_version
      before_compute = true
      configuration_values = jsonencode({
        env = {
          # Reference docs https://docs.aws.amazon.com/eks/latest/userguide/cni-increase-ip-addresses.html
          ENABLE_PREFIX_DELEGATION = "true"
          WARM_PREFIX_TARGET       = "1"
        }
      })
    }
  }

  # Fargate profiles use the cluster primary security group so these are not utilized by fargate-based services
  create_cluster_security_group = true
  create_node_security_group    = true

  fargate_profiles = {

    external-dns = {
      subnet_ids = local.private_subnets
      selectors = [
        { namespace = "external-dns" }
      ]
    }

    external-secrets = {
      subnet_ids = local.private_subnets
      selectors = [
        { namespace = "external-secrets" }
      ]
    }

    karpenter = {
      subnet_ids = local.private_subnets
      selectors = [
        { namespace = "karpenter" }
      ]
    }

    kube-system = {
      subnet_ids = local.private_subnets
      selectors = [
        { namespace = "kube-system" }
      ]
    }
  }

  cluster_timeouts = {
    create = "30m"
    update = "30m"
    delete = "10m"
  }

  tags = merge(local.tags, {
    # NOTE - if creating multiple security groups with this module, only tag the
    # security group that Karpenter should utilize with the following tag
    # (i.e. - at most, only one security group should have this tag in our account)
    "karpenter.sh/discovery" = var.eks_cluster_name
  })

  depends_on = [
    aws_iam_service_linked_role.spot,
    module.iam_role_eks_break_glass
  ]
}

################################################################################
# EKS Addons
################################################################################
module "eks_blueprints_addons" {
  source  = "aws-ia/eks-blueprints-addons/aws"
  version = "1.16.3"

  cluster_endpoint  = module.eks_cluster.cluster_endpoint
  cluster_name      = module.eks_cluster.cluster_name
  cluster_version   = module.eks_cluster.cluster_version
  oidc_provider_arn = module.eks_cluster.oidc_provider_arn

  # karpenter
  enable_karpenter = true
  karpenter = {
    chart_version       = "0.37.0"
    repository_username = data.aws_ecrpublic_authorization_token.token.user_name
    repository_password = data.aws_ecrpublic_authorization_token.token.password
    set = [
      {
        name  = "max_history"
        value = "5"
      }
    ]
  }
  karpenter_enable_instance_profile_creation = true
  karpenter_enable_spot_termination          = true
  karpenter_node = {
    iam_role_additional_policies = {
      # to allow SSM exec into node
      "AmazonSSMManagedInstanceCore" = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
      # Use static name so that it matches what is defined in `karpenter.yaml` example manifest
      #iam_role_use_name_prefix = false
    }
  }
  karpenter_sqs = true

  # metrics server
  enable_metrics_server = true
  metrics_server = {
    chart_version = "3.12.1"
    set = [
      {
        name  = "max_history"
        value = "5"
      }
    ]
  }

  # lb controller
  enable_aws_load_balancer_controller = true
  aws_load_balancer_controller = {
    chart_version = "1.8.1"
    set = [
      {
        name  = "replicaCount"
        value = "3"
      },
      {
        name  = "clusterName"
        value = module.eks_cluster.cluster_name
      },
      {
        name  = "vpcId"
        value = local.vpc_id
      },
      {
        name  = "enableServiceMutatorWebhook"
        value = "false"
      },
      {
        name  = "podDisruptionBudget.maxUnavailable"
        value = 1
      },
      {
        name  = "max_history"
        value = "5"
      }
    ]
  }

  # external dns controller
  enable_external_dns = true
  external_dns_route53_zone_arns = [
    data.terraform_remote_state.bm-infra.outputs.private_hosted_zone_arn,
    data.terraform_remote_state.bm-infra.outputs.public_hosted_zone_arn
  ]
  external_dns = {
    #chart_version = "1.14.5"
    set = [
      {
        name  = "namespace"
        value = "external-dns"
      },
      {
        name  = "max_history"
        value = "5"
      }
    ]
  }

  # external secrets controller
  enable_external_secrets = true
  external_secrets = {
    chart_version = "0.9.19"
    set = [
      {
        name  = "namespace"
        value = "external-secrets"
      },
      {
        name  = "max_history"
        value = "5"
      },

    ]
  }

  depends_on = [
    module.eks_cluster
  ]
}

resource "aws_eks_access_entry" "karpenter" {
  cluster_name  = module.eks_cluster.cluster_name
  principal_arn = module.eks_blueprints_addons.karpenter.node_iam_role_arn
  type          = "EC2_LINUX"
}

Expected behavior

clean terraform plan/apply

Actual behavior

see above.

jamesmaccoll commented 3 months ago

anyone?

bryantbiggs commented 2 months ago

this is not related to Karpenter or the Helm chart - its the nature of the aws_ecrpublic_authorization_token data source. On every invocation, its going to request a new set of credentials

I would suggest looking more Kubernetes friendly application deployment tools such as ArgoCD or FluxCD instead of Terraform - Terraform is more suited for infrastructure, and not well suited for application deployment