UrbanOS-Examples / common

Name says it all
Apache License 2.0
0 stars 0 forks source link

Update terraform-aws-eks module #405

Closed ScottMillard closed 5 years ago

ScottMillard commented 5 years ago

We forked terraform-aws-eks a long time ago. Determine what changes were made in the fork and if they are still needed it we update to the latest version. Either merge in the forked repo changes or change common to use the public repo.

Acceptance criteria of spike:

Spike is to help prep for these ultimate goals:

Tech note:

ManApart commented 5 years ago

Basic summary of our changes against the last time we merged upstream into our fork:

diff --git a/data.tf b/data.tf
index d4b5106..8a6d8c7 100644
--- a/data.tf
+++ b/data.tf
@@ -18,7 +18,7 @@ data "aws_iam_policy_document" "workers_assume_role_policy" {
 data "aws_ami" "eks_worker" {
   filter {
     name   = "name"
-    values = ["amazon-eks-node-${var.cluster_version}-v*"]
+    values = ["amazon-eks-node-${var.cluster_version}-v${var.ami_version}"]
   }

   most_recent = true
diff --git a/local.tf b/local.tf
index 5597cdd..4c7e062 100644
--- a/local.tf
+++ b/local.tf
@@ -15,6 +15,8 @@ locals {
     asg_desired_capacity          = "1"                             # Desired worker capacity in the autoscaling group.
     asg_max_size                  = "3"                             # Maximum worker capacity in the autoscaling group.
     asg_min_size                  = "1"                             # Minimum worker capacity in the autoscaling group.
+    asg_rolling_update_max_batch_size = "1" # Specifies the maximum number of instances that will be updated at once during an autoscaling group rolling update.
+    asg_rolling_update_min_instances_in_service = "0" # Specifies the minimum number of instances that must be in service within the autoscaling group during a rolling update.
     instance_type                 = "m4.large"                      # Size of the workers instances.
     spot_price                    = ""                              # Cost of spot instance.
     placement_tenancy             = ""                              # The tenancy of the instance. Valid values are "default" or "dedicated".
diff --git a/outputs.tf b/outputs.tf
index 12041d9..3cb2def 100644
--- a/outputs.tf
+++ b/outputs.tf
@@ -39,14 +39,11 @@ output "kubeconfig" {
   value       = "${data.template_file.kubeconfig.rendered}"
 }

-output "workers_asg_arns" {
-  description = "IDs of the autoscaling groups containing workers."
-  value       = "${concat(aws_autoscaling_group.workers.*.arn, aws_autoscaling_group.workers_launch_template.*.arn)}"
-}
-
 output "workers_asg_names" {
   description = "Names of the autoscaling groups containing workers."
-  value       = "${concat(aws_autoscaling_group.workers.*.id, aws_autoscaling_group.workers_launch_template.*.id)}"
+  value       = [
+    "${data.template_file.workers_names.*.rendered}"
+  ]
 }

 output "worker_security_group_id" {
diff --git a/variables.tf b/variables.tf
index 1937585..6712004 100644
--- a/variables.tf
+++ b/variables.tf
@@ -12,6 +12,11 @@ variable "cluster_version" {
   default     = "1.11"
 }

+variable "ami_version" {
+  description = "Version for the EKS-optimized worker AMI."
+  default     = "*"
+}
+
 variable "config_output_path" {
   description = "Where to save the Kubectl config file (if `write_kubeconfig = true`). Should end in a forward slash `/` ."
   default     = "./"
diff --git a/workers.tf b/workers.tf
index 0c7acd4..1e3cb5b 100644
--- a/workers.tf
+++ b/workers.tf
@@ -1,29 +1,45 @@
-# Worker Groups using Launch Configurations
-
-resource "aws_autoscaling_group" "workers" {
-  name_prefix           = "${aws_eks_cluster.this.name}-${lookup(var.worker_groups[count.index], "name", count.index)}"
-  desired_capacity      = "${lookup(var.worker_groups[count.index], "asg_desired_capacity", local.workers_group_defaults["asg_desired_capacity"])}"
-  max_size              = "${lookup(var.worker_groups[count.index], "asg_max_size", local.workers_group_defaults["asg_max_size"])}"
-  min_size              = "${lookup(var.worker_groups[count.index], "asg_min_size", local.workers_group_defaults["asg_min_size"])}"
-  target_group_arns     = ["${compact(split(",", coalesce(lookup(var.worker_groups[count.index], "target_group_arns", ""), local.workers_group_defaults["target_group_arns"])))}"]
-  launch_configuration  = "${element(aws_launch_configuration.workers.*.id, count.index)}"
-  vpc_zone_identifier   = ["${split(",", coalesce(lookup(var.worker_groups[count.index], "subnets", ""), local.workers_group_defaults["subnets"]))}"]
-  protect_from_scale_in = "${lookup(var.worker_groups[count.index], "protect_from_scale_in", local.workers_group_defaults["protect_from_scale_in"])}"
-  suspended_processes   = ["${compact(split(",", coalesce(lookup(var.worker_groups[count.index], "suspended_processes", ""), local.workers_group_defaults["suspended_processes"])))}"]
-  count                 = "${var.worker_group_count}"
-
-  tags = ["${concat(
-    list(
-      map("key", "Name", "value", "${aws_eks_cluster.this.name}-${lookup(var.worker_groups[count.index], "name", count.index)}-eks_asg", "propagate_at_launch", true),
-      map("key", "kubernetes.io/cluster/${aws_eks_cluster.this.name}", "value", "owned", "propagate_at_launch", true),
-      map("key", "k8s.io/cluster-autoscaler/${lookup(var.worker_groups[count.index], "autoscaling_enabled", local.workers_group_defaults["autoscaling_enabled"]) == 1 ? "enabled" : "disabled"  }", "value", "true", "propagate_at_launch", false)
-    ),
-    local.asg_tags)
-  }"]
-
-  lifecycle {
-    ignore_changes = ["desired_capacity"]
-  }
+resource "aws_cloudformation_stack" "workers" {
+  count         = "${var.worker_group_count}"
+  name          = "${replace("${aws_eks_cluster.this.name}-${lookup(var.worker_groups[count.index], "name", count.index)}", "/[^-a-zA-Z0-9]/", "-")}"
+  template_body = <<EOF
+---
+AWSTemplateFormatVersion: "2010-09-09"
+Description: Terraform-managed CF Stack for Auto-Scaling Group
+Resources:
+  AutoScalingGroup:
+    Type: AWS::AutoScaling::AutoScalingGroup
+    Properties:
+      AutoScalingGroupName: ${aws_eks_cluster.this.name}-${lookup(var.worker_groups[count.index], "name", count.index)}
+      VPCZoneIdentifier: ${jsonencode(split(",", coalesce(lookup(var.worker_groups[count.index], "subnets", ""), join(",", var.subnets))))}
+      LaunchConfigurationName: ${element(aws_launch_configuration.workers.*.id, count.index)}
+      MinSize: ${lookup(var.worker_groups[count.index], "asg_min_size",lookup(local.workers_group_defaults, "asg_min_size"))}
+      MaxSize: ${lookup(var.worker_groups[count.index], "asg_max_size",lookup(local.workers_group_defaults, "asg_max_size"))}
+      Tags: ${jsonencode(concat(
+        list(
+          map("Key", "Name", "Value", "${aws_eks_cluster.this.name}-${lookup(var.worker_groups[count.index], "name", count.index)}-eks_asg", "PropagateAtLaunch", "True"),
+          map("Key", "kubernetes.io/cluster/${aws_eks_cluster.this.name}", "Value", "owned", "PropagateAtLaunch", "True"),
+        ),
+        local.asg_tags))}
+    UpdatePolicy:
+      AutoScalingRollingUpdate:
+        MaxBatchSize: ${lookup(var.worker_groups[count.index], "asg_rolling_update_max_batch_size",lookup(local.workers_group_defaults, "asg_rolling_update_max_batch_size"))}
+        MinInstancesInService: ${lookup(var.worker_groups[count.index], "asg_rolling_update_min_instances_in_service",lookup(local.workers_group_defaults, "asg_rolling_update_min_instances_in_service"))}
+        SuspendProcesses:
+          - HealthCheck
+          - ReplaceUnhealthy
+          - AZRebalance
+          - AlarmNotification
+          - ScheduledActions
+Outputs:
+  AutoScalingGroupName:
+    Description: The name of the Auto-Scaling Group
+    Value: !Ref AutoScalingGroup
+EOF
+}
+
+data "template_file" "workers_names" {
+  count    = "${var.worker_group_count}"
+  template = "${lookup(aws_cloudformation_stack.workers.*.outputs[count.index], "AutoScalingGroupName")}"
 }

 resource "aws_launch_configuration" "workers" {
@@ -136,11 +152,11 @@ resource "aws_iam_role_policy_attachment" "workers_AmazonEC2ContainerRegistryRea
 resource "null_resource" "tags_as_list_of_maps" {
   count = "${length(keys(var.tags))}"

-  triggers = {
-    key                 = "${element(keys(var.tags), count.index)}"
-    value               = "${element(values(var.tags), count.index)}"
-    propagate_at_launch = "true"
-  }
+  triggers = "${map(
+    "Key", "${element(keys(var.tags), count.index)}",
+    "Value", "${element(values(var.tags), count.index)}",
+    "PropagateAtLaunch", "True"
+  )}"
 }

 resource "aws_iam_role_policy_attachment" "workers_autoscaling" {
ManApart commented 5 years ago

git lg of changes since we last merged the upstream into our fork 8 months ago

* 07c61b6  (tag: v6.0.1, upstream/master) New release 6.0.1 (#528) 5 hours ago Thierno IB. BARRY
* 39cb93b  Use null as default value for target group ARNs (#524) 29 hours ago Tatu Seppä-Lassila
* 88afa43  fix docs generation for lint (#523) 30 hours ago Thierno IB. BARRY
* f79c790  Gpu workers support (#515) 2 days ago Roman Voitenko
*   0cc058c  Merge pull request #517 from stakater/add-notes 5 days ago Thierno IB. BARRY
|\  
| * 185cd44  [add-notes] update the docs/autoscaling.md document 5 days ago irti
| * 0fd7046  update local.tf by fixing typo 6 days ago Irtiza Ali
| * 0d49f82  [add-notes] update local.tf file 7 days ago irti
* |   f702467  Merge pull request #503 from nauxliu/tflint 6 days ago Thierno IB. BARRY
|\ \  
| * | 347db3e  Improve CI pipeline 7 days ago Xiangxuan Liu
|/ /  
* | b6125b8  output empty string when cluster identity is empty (#516) 7 days ago Thierno IB. BARRY
|/  
* bf8c324  (tag: v6.0.0) New Release v6.0.0 (#514) 7 days ago Max Williams
* 13ee091  add doc for iam permission (#511) 8 days ago Ali Kahoot
* 2b88e50  Add URL for OIDC issuer to allow IAM roles for Pods (#506) 9 days ago russwhelan
* 4f4d9c3  Fix launch template market option expansion (#508) 9 days ago Tomas Ostasevicius
* f88220a  Fix link to docs so it doesn't 404 when published on terraform registry page (#507) 12 days ago Marcello Romani
* 461cf54  Support for Mixed Instances ASG in worker_groups_launch_template variable (#468) 12 days ago Sergiu Plotnicu
* a47f464  Allow to set user defined workers role name (#496) 13 days ago Ivanich
* 4264a5f  use default_iam_role_id interpolation for aws_iam_instance_profile (#489) 2 weeks ago Thierno IB. BARRY
* 2254ab7  Replace travis with GitHub Actions (#497) 2 weeks ago 刘相轩
* 6c3e4ec  Update to EKS 1.14 (#498) 2 weeks ago 刘相轩
* 97df53c  launch configuration doen't have version (#482) 4 weeks ago Thierno IB. BARRY
* d6d0975  add initial lifecycle hooks for autosacling groups (#466) 5 weeks ago Thierno IB. BARRY
* d8ed7d0  add option to recreate ASG when LT or LC changes (#465) 5 weeks ago Thierno IB. BARRY
* 5636447  Wrapping kubelet_extra_args in double quotes (#473) (#474) 5 weeks ago Nick Fisher
* 655a75f  Update cluster_security_group_id doc (#472) 5 weeks ago Daniel Piddock
* b335819  Adding 'capacity-optimized' docs to locals (#469) 5 weeks ago Scott Crooks
* 8580b67  Support map users and roles to multiple groups (#424) 5 weeks ago 刘相轩
* b8b3b58  basic example: correct elb tags (#458) 7 weeks ago Karoline Pauls
* fb71eaf  Allow launch template spot instances without mixed policy (#463) 7 weeks ago Tarek Abdel Sater
* c9986f5  Fix errors from usage of coalesce (#402) (#459) 7 weeks ago Petri Kero
* ebac6c9  Adding tags for Log groups and workers IAM role (#448) 7 weeks ago Lucas Giacomazzi
* ac62edc  Add `required_providers` to set minimum versions (#464) 7 weeks ago Daniel Piddock
* 630a0cc  Fix fmt for v0.12.6 (#460) 7 weeks ago Karoline Pauls
* c5c3d38  Additional tag for autoscaling enabled (#454) 8 weeks ago Alexandr Grab
* 6d0025e  move/merge docs (#453) 8 weeks ago Max Williams
* 6ea3582  (tag: v5.1.0) new release (#452) 8 weeks ago Max Williams
* 52286a0  Support for tagging EBS Volumes created by "workers_launch_template*.tf" (#450) 8 weeks ago Sergiu Plotnicu
* e876ce2  VPC: `enable_dns_hostnames = true` in examples (#446) 8 weeks ago Karoline Pauls
* f755300  chore: fix README.md worker_groups tags syntax (#405) 9 weeks ago James
* a95ad00  added market options to request spot instances without pools (#409) 9 weeks ago Onur Sam
* ec64a74  Update README.md (#436) 2 months ago mandarin801
* 34b7451  Configure 'cpu_credits' for workers (#253) (#426) 3 months ago Alexander Shinkarenko
* 4b1df0c  Update default override instance types to work with Cluster Autoscaler (#425) 3 months ago 刘相轩
* 6590198  Fix suspended_processes to be a type list (#423) 3 months ago Laurent Godet
* 80b0bc7  Remove outdated important issues (#416) 3 months ago 刘相轩
* f1041af  add cloudwatch log group access (#410) 3 months ago gbooth27
* 069c2da  typo (#407) 3 months ago Alex Romanov
* ba33773  Added Option to use KMS key for cloudwatch logs group. (#404) 3 months ago till-krauss
* 8260f10  EKS 1.13 out (#400) 3 months ago Gauthier
* 9c3d222  (tag: v5.0.0) New release: now supporting TF 0.12!! (#399) 3 months ago Max Williams
* da2c78b  Upgrade to terraform 0.12 (#394) 3 months ago 刘相轩
* 3f06015  adds tags to resource aws_iam_role.cluster (#390) 3 months ago Camilo Santana
* bf5dae0  Enable log retention for cloudwatch log groups (#387) 3 months ago Yurii Polishchuk
* 758fdab  Termination Policy Option to worker ASG (#393) 4 months ago Mitch Anderson
* 8c61f58  add additional policies for control plane log setup. (#377) 4 months ago Feifei Jia
* feb8810  Update EBS optimized instances type (#384) 4 months ago Gauthier
* a9753e8  Fix IAM instance profile toggle for mixed launch templates (#381) 4 months ago jnozo
* 763a3d5  Fix typos (#379) 5 months ago Thuan Duong
* d6fa9f4  (tag: v4.0.2) Better examples, PR template changes, general tidy up (#375) 5 months ago Max Williams
* f083816  (tag: v4.0.1) Fix annoying typo: worker_group_xx vs worker_groups_xx (#374) 5 months ago Max Williams
* f155e40  (tag: v4.0.0) New release v4.0.0 (#373) 5 months ago Max Williams
* ae2f8e5  Adding new mixed type of worker group with instance overrides and mixed instances policy (#371) 5 months ago Max Williams
* 2439c25  Cleaning up and deduplicating launch template related code (#370) 5 months ago Max Williams
* 959e533  Support custom IAM roles for cluster and workers (#338) 5 months ago Touch Ungboriboonpisal
* 613fb1c  adding cluster arn to outputs (#372) 5 months ago Alex Snast
* 60dfeca  Adding 2 new outputs: AMI ID and work user-data (#364) 5 months ago Max Williams
* 1660105  Disabling ASG process AZRebalance by default (#369) 5 months ago Max Williams
* b27b582  Adding doc about spot instances (#362) 5 months ago Max Williams
* 3ece32f  Fixes for Launch Templates (#361) 5 months ago Max Williams
* 2b633a1  Fix small typo (#367) 5 months ago Ivan Kovnatsky
* 18baeea  Add option to use custom service linked role for Auto Scaling group (#359) 5 months ago Võ Anh Duy
* d4be9f4  Add .prettierignore file (#350) 5 months ago Andrew Roth
* 2c89a8f  Switch to https for the pre-commit repos (#349) 5 months ago Andrew Roth
* b2da12d  Add instructions on how to enable the docker bridge network (#352) 5 months ago Andrew Roth
* 1a26f35  (tag: v3.0.0) Release v3.0.0 (#347) 5 months ago Max Williams
* 47c7e7a  Fix: ENI's prevent SecGrps from being destroyed on tf destroy (#311) 6 months ago Jeffrey Rose
* 18e0086  Adding EKS Control Plane logging options (#340) 6 months ago Scott Crooks
* b81a15a  Add support for placement group in launch template (#332) 6 months ago rverma-nikiai
* fb59e4f  adding IAM instance profiles to outputs, addresses #323 (#329) 6 months ago soapergem
* 46ec636  Update eks module deps (#334) 6 months ago chenrui
* 7a44845  Update to 1.12 (#327) 6 months ago Stijn De Haes
* f1858c8  (tag: v2.3.1) New release: 2.3.1 (#321) 6 months ago Max Williams
* 8b2e1c2  Replacing enable_docker_bridge with a generic option called bootstrap_extra_args (#320) 6 months ago Max Williams
* 806edb6  Add support for eks endpoint_private_access and endpoint_public_access (#314) 6 months ago Stijn De Haes
* 97c7964  Adding minimum communication security group rule for Kubelet (#318) 6 months ago Scott Crooks
* a26a43a  (tag: v2.3.0) Release v.2.3.0 (#309) 6 months ago Max Williams
* fd1f149  Add support for placement groups (#306) 6 months ago Matheus Fernandes
* bef3c36  Allow additional policies to be attached to worker nodes (#308) 6 months ago Nicolas Szalay
* efaa3d8  Add cluster name and ephemeral storage tags for cluster autoscaler (#299) 6 months ago Taylor Barrella
* 80085f5  Add enable_docker_bridge (#302) 7 months ago michaelmccord
*   d02bbc5  Merge pull request #298 from skang0601/fix-cluster-autoscaler-role-for-launch-templates 7 months ago Brandon J. O'Connor
|\  
| *   bb6921e  Merge branch 'master' into fix-cluster-autoscaler-role-for-launch-templates 7 months ago Brandon J. O'Connor
| |\  
| |/  
|/|   
* | 3795811  Adding workers_launch_template ebs encryption (#292) 7 months ago russki
| * 28fd3c5  add ec2:DescribeLaunchTemplateVersions action to worker node iam role 7 months ago Sung Kang
|/  
*   bb9c1b0  Merge pull request #284 from tekn0ir/iam_path 7 months ago Brandon J. O'Connor
|\  
| * 3728299  Add optional iam_path 7 months ago Anders Åslund
* |   9fe2cfd  Merge pull request #296 from max-rocket-internet/ami_filter 7 months ago Brandon J. O'Connor
|\ \  
| * | 7f8ef9a  fix changelog 7 months ago Max Williams
| * | 9598647  Adding optional name filter variable to be able to pin worker AMI to a release 7 months ago Max Williams
|/ /  
* | dcdf413  typo fix (#293) 7 months ago gad0lin
* | d473b71  Add outputs for cluster role ARN and name (#290) 7 months ago Steffen Pingel
|/  
* 87114b0  (tag: v2.2.1) Released 2.2.1 7 months ago Anton Babenko
*   6005cec  Merge pull request #272 from syst0m/master 7 months ago Anton Babenko
|\  
| * 28bf0c1  Added handling for disabled kubeconfig 7 months ago Tomislav Tomašić
| * e8071b3  Added output for generated kubeconfig filename. 8 months ago syst0m
|/  
* ba90fba  (tag: v2.2.0) Release v2.2.0 (#270) 8 months ago Max Williams
* a1a1644  Add optional permissions_boundary (#265) 8 months ago Dylan Hellems
* a23c43c  Updating example IAM docs to include Launch Template actions (#268) 8 months ago skang0601
* d3c1bd6  Added write_aws_auth_config option (#228) 8 months ago yutachaos
* 35747d7  Worker group tags (#252) 8 months ago Stefan Sedich
* eac4164  Adding the g3s.xlarge instance type ebs optimized mapping (#258) 8 months ago Stefan Sedich
* 39f30e9  Add enabled_metrics attributes to autoscaling_group (#256) 8 months ago Stefano Zaninetta
* dfd5a8f  Use launch template defaults for launch template userdata (#255) 8 months ago leonsodhi-lf
* 8473c69  Enable create_before_destroy for ASG and enable force_delete to be configured (#250) 8 months ago Stefan Sedich
ManApart commented 5 years ago

Options:

  1. Rebase off of upstream and keep going with our rolling update route
  2. Do as the upstream maintainers suggest and do the autoscaling changes outside of the module through lifecycle hooks, etc. This option would just change common to use the upstream module and then we'd make several changes in common to do rolling updates ourselves.
  3. Do nothing, continue to fall further behind, and address the issue when security vulnerabilities are found or we need newer features

Considerations:

ManApart commented 5 years ago

Just briefly tried to use the most recent version of the module that support tf 0.11.x. This also looks to require a patch version bump of terraform, which is non-trivial. Still unclear if this particular version even supports rolling updates.