Closed almukh75 closed 2 years ago
Same issue here
I'm trying to migrate from v17 to v18. Here is my v17 config:
node_groups_defaults = {
disk_size = 10
create_launch_template = true
ami_type = "AL2_x86_64"
min_capacity = 0
max_capacity = 10
desired_capacity = 0
capacity_type = "ON_DEMAND"
bootstrap_env = {
USE_MAX_PODS = false
CONTAINER_RUNTIME = "containerd"
}
taints = [
{
key = "dedicated"
value = "true"
effect = "NO_SCHEDULE"
}
]
}
node_groups = {
"default-a-" = {
desired_capacity = 1
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[0]]
kubelet_extra_args = "--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.9.1 --cni-prefix-delegation-enabled")}"
taints = []
k8s_labels = {
size = "medium"
network = "private"
arch = "arm64"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}a"
}
}
}
Here is my new v18 config:
eks_managed_node_group_defaults = {
disk_size = 10
min_capacity = 0
max_capacity = 10
desired_capacity = 0
capacity_type = "ON_DEMAND"
enable_bootstrap_user_data = true
}
eks_managed_node_groups = {
"default-a" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[0]]
enable_bootstrap_user_data = false
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}a"
}
}
"default-b" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[1]]
enable_bootstrap_user_data = false
bootstrap_extra_args = "--container-runtime containerd --kubelet-extra-args '--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}'"
pre_bootstrap_user_data = <<-EOT
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}b"
}
}
"default-c" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[2]]
enable_bootstrap_user_data = true
bootstrap_extra_args = "--container-runtime containerd --kubelet-extra-args '--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}'"
pre_bootstrap_user_data = <<-EOT
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}b"
}
}
"bottlerocket-a" = {
desired_capacity = 1
ami_type = "BOTTLEROCKET_x86_64"
platform = "bottlerocket"
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[0]]
bootstrap_extra_args = <<-EOT
"max-pods" = ${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}a"
}
}
"bottlerocket-b" = {
desired_capacity = 1
ami_type = "BOTTLEROCKET_x86_64"
platform = "bottlerocket"
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[1]]
bootstrap_extra_args = <<-EOT
"max-pods" = ${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}b"
}
}
"bottlerocket-c" = {
desired_capacity = 1
ami_type = "BOTTLEROCKET_x86_64"
platform = "bottlerocket"
instance_types = ["t3a.large"]
subnets = [dependency.vpc.outputs.private_subnets[2]]
bootstrap_extra_args = <<-EOT
"max-pods" = ${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}c"
}
}
}
The bottlerocket pools are working fine, I have issue with the Amazon Linux Pool, where I cannot get Containerd and bootstrap to work as per https://github.com/terraform-aws-modules/terraform-aws-eks/blob/master/examples/eks_managed_node_group/main.tf#L166
Let's go pool by pool
pre-merged:
No CUSTOM USER DATA
post-merged:
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="//"
--//
Content-Type: text/x-shellscript; charset="us-ascii"
#!/bin/bash
set -ex
B64_CLUSTER_CA=REDACTED
API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com
K8S_CLUSTER_DNS_IP=172.20.0.10
/etc/eks/bootstrap.sh pio-teks-demo-demo --kubelet-extra-args '--node-labels=eks.amazonaws.com/sourceLaunchTemplateVersion=5,eks.amazonaws.com/nodegroup-image=ami-06efe6b9d402e8674,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup=default-a-20220112161956265600000004,eks.amazonaws.com/sourceLaunchTemplateId=lt-08c0df7227547bf5b --max-pods=110' --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL --dns-cluster-ip $K8S_CLUSTER_DNS_IP --use-max-pods false
--//--
This seems to be the standard user data, although I'm not sure how use-max-pods false
and max-pods
ended up here by default but it might be relicas of something I tried before (or is EKS able to calculate this on it's own now ? )
Node in the end is running Docker and not containerd.
pre-merged:
#!/bin/bash
set -e
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
B64_CLUSTER_CA=REDACTED
API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com
/etc/eks/bootstrap.sh pio-teks-demo-demo --container-runtime containerd --kubelet-extra-args '--max-pods=110' --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL
post-merged:
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="//"
--//
Content-Transfer-Encoding: 7bit
Content-Type: text/x-shellscript
Mime-Version: 1.0
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
--//
Content-Type: text/x-shellscript; charset="us-ascii"
#!/bin/bash
set -ex
B64_CLUSTER_CA=REDACTED
API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com
K8S_CLUSTER_DNS_IP=172.20.0.10
/etc/eks/bootstrap.sh pio-teks-demo-demo --kubelet-extra-args '--node-labels=eks.amazonaws.com/sourceLaunchTemplateVersion=3,eks.amazonaws.com/nodegroup-image=ami-06efe6b9d402e8674,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup=default-b-20220112162947978700000003,eks.amazonaws.com/sourceLaunchTemplateId=lt-04b8a55955eb3eaf6 --max-pods=110' --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL --dns-cluster-ip $K8S_CLUSTER_DNS_IP --use-max-pods false
--//--
Here my previous custom args are missing form the final LT config, probably because of the missing. Node in the end is running Docker and not containerd.
Just fail to create with:
╷
│ Error: error waiting for EKS Node Group (pio-teks-demo-demo:default-c-20220112162947978700000003) version update (fcf678f4-bfbb-3f56-bbf5-f20dce780e0b): unexpected state 'Failed', wanted target 'Successful'. last error: 1 error occurred:
│ * : Unknown: User data was not in the MIME multipart format.
│
│
│
│ with module.eks_managed_node_group["default-c"].aws_eks_node_group.this[0],
│ on modules/eks-managed-node-group/main.tf line 260, in resource "aws_eks_node_group" "this":
│ 260: resource "aws_eks_node_group" "this" {
│
╵
Here is the pre merged userdata:
#!/bin/bash
set -e
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
B64_CLUSTER_CA=REDACTED
API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com
/etc/eks/bootstrap.sh pio-teks-demo-demo --container-runtime containerd --kubelet-extra-args '--max-pods=110' --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL
Not sure what to make of it as it is the same as default-b but without the enable_bootstrap_user_data
which seems to break it.
Terraform v1.1.3
on linux_amd64
+ provider registry.terraform.io/hashicorp/aws v3.71.0
+ provider registry.terraform.io/hashicorp/cloudinit v2.2.0
+ provider registry.terraform.io/hashicorp/github v4.19.1
+ provider registry.terraform.io/hashicorp/kubernetes v2.7.1
+ provider registry.terraform.io/hashicorp/tls v3.1.0
Node to be launched with containerd as runtime and the max-prod specify by ENI max pod calculator
Launching nodes fails.
╷
│ Error: error waiting for EKS Node Group (pio-teks-demo-demo:default-c-20220112162947978700000003) version update (fcf678f4-bfbb-3f56-bbf5-f20dce780e0b): unexpected state 'Failed', wanted target 'Successful'. last error: 1 error occurred:
│ * : Unknown: User data was not in the MIME multipart format.
│
│
│
│ with module.eks_managed_node_group["default-c"].aws_eks_node_group.this[0],
│ on modules/eks-managed-node-group/main.tf line 260, in resource "aws_eks_node_group" "this":
│ 260: resource "aws_eks_node_group" "this" {
│
╵
ERRO[0285] 1 error occurred:
* exit status 1
apologies for any confusion - I opened a PR to fix the documentation:
pre_bootstrap_user_data
ami_id
- this is the only route for EKS managed node groups where you can take control of the user data - see https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-user-dataNote: it doesn't need to be a "custom" AMI - just use a data source like below. This will tell EKS managed node groups to not provide any user data and instead you can provide the full user data you desire via the module
data "aws_ami" "eks_default" {
filter {
name = "name"
values = ["amazon-eks-node-${var.cluster_version}-v*"]
}
most_recent = true
owners = ["amazon"]
}
module "eks" {
...
custom_ami = data.aws_ami.eks_default.image_id
...
}
Also, do checkout https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/user_data - you can run this locally (its all local stdout stuff, no AWS API calls so no creds needed) and better understand how the user data is being rendered for different scenarios
I’m a little confused, what would be the simplest way to run default EKS AMI and to just switch containerd on and set max pods when using prefix delegation.
Turning containerid on should be simple https://github.com/terraform-aws-modules/terraform-aws-eks/blob/78555e1a1383b1dba7a68899666463326d456ea1/examples/user_data/main.tf#L22 based on https://github.com/awslabs/amazon-eks-ami/blob/2d3e6c1ba6c1afcc4dec22f8b71216b6dc2aa86c/files/bootstrap.sh#L130
I don't think you can set max pods with the default AMI on EKS managed node groups, you'd have to use a custom AMI to set bootstrap args and kubelet flags https://github.com/awslabs/amazon-eks-ami/blob/2d3e6c1ba6c1afcc4dec22f8b71216b6dc2aa86c/files/bootstrap.sh#L19 && https://github.com/awslabs/amazon-eks-ami/blob/2d3e6c1ba6c1afcc4dec22f8b71216b6dc2aa86c/files/bootstrap.sh#L428
When I tested this, EKS was always taking control over max pods set
Managed node groups calculates and applies a single value for the maximum number of pods that can run on each node of your node group, based on instance type. If you create a node group with different instance types, the smallest value calculated across all instance types is applied as the maximum number of pods that can run on every instance type in the node group. Managed node groups calculates the value using the script referenced in Amazon EKS recommended maximum Pods for each Amazon EC2 instance type.
This issue has been resolved in version 18.1.0 :tada:
@antonbabenko don't see anything related to this issue in the changelog of 18.1.0
regarding the max-pods, the latest version of aws-cni leverage this limit. The question is: do we still need to change the max-pods argument?
@andronux It was closed in #1773. The fix was a chore-type of change (title of pull request indicates the type), so it didn't end up in changelog.
This works to enable containerd and max pods, but this switch from using "EKS optimized AMI" to custom AMI, in v17 there was a way to get containerd and max pods to work with EKS optimized AMI set on node group and not on launch template
eks_managed_node_groups = {
"default-a" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
ami_id = "ami-020452378df41ab4b"
instance_types = ["t3a.medium"]
enable_bootstrap_user_data = true
subnets = [dependency.vpc.outputs.private_subnets[0]]
pre_bootstrap_user_data = <<-EOT
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}"
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}a"
}
}
with this userdata:
#!/bin/bash
set -e
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=110"
B64_CLUSTER_CA=REDACTED
API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com
/etc/eks/bootstrap.sh pio-teks-demo-demo --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL
This does not :
"default-b" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
instance_types = ["t3a.medium"]
subnets = [dependency.vpc.outputs.private_subnets[0]]
pre_bootstrap_user_data = <<-EOT
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}"
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}a"
}
}
with this userdata:
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="//"
--//
Content-Transfer-Encoding: 7bit
Content-Type: text/x-shellscript
Mime-Version: 1.0
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=110"
--//
Content-Type: text/x-shellscript; charset="us-ascii"
#!/bin/bash
set -ex
B64_CLUSTER_CA=REDACTED
API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com
K8S_CLUSTER_DNS_IP=172.20.0.10
/etc/eks/bootstrap.sh pio-teks-demo-demo --kubelet-extra-args '--node-labels=eks.amazonaws.com/sourceLaunchTemplateVersion=1,eks.amazonaws.com/nodegroup-image=ami-020452378df41ab4b,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup=default-b-20220117091627900400000008,eks.amazonaws.com/sourceLaunchTemplateId=lt-01b0dc2b5e873543d --max-pods=110' --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL --dns-cluster-ip $K8S_CLUSTER_DNS_IP --use-max-pods false
--//--
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
ip-10-0-34-213.eu-west-1.compute.internal Ready <none> 22h v1.21.6 10.0.34.213 <none> Bottlerocket OS 1.5.2 (aws-k8s-1.21) 5.10.75 containerd://1.5.8+bottlerocket
ip-10-0-40-248.eu-west-1.compute.internal Ready <none> 8m44s v1.21.5-eks-bc4871b 10.0.40.248 <none> Amazon Linux 2 5.4.162-86.275.amzn2.x86_64 docker://20.10.7
ip-10-0-55-205.eu-west-1.compute.internal Ready <none> 19m v1.21.5-eks-bc4871b 10.0.55.205 <none> Amazon Linux 2 5.4.162-86.275.amzn2.x86_64 containerd://1.4.6
here is the user data generated in v17 with bootstrap args implemented by @stevehipwell :
--//
Content-Transfer-Encoding: 7bit
Content-Type: text/x-shellscript
Mime-Version: 1.0
#!/bin/bash -e
# Define extra environment variables for bootstrap
printf '#!/bin/bash
export CONTAINER_RUNTIME="%s"
export USE_MAX_PODS="%s"
export KUBELET_EXTRA_ARGS="%s"
' "containerd" "false" "--max-pods=110" > /etc/profile.d/bootstrap.sh
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
# Allow user supplied pre userdata code
I'll check if I successfully do this the EKS optmized AMI without specifying custom AMI and update the docs, wdyt @bryantbiggs ?
I managed to get it to work with EKS default AMI with the following:
"default-c" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
instance_types = ["t3a.medium"]
subnets = [dependency.vpc.outputs.private_subnets[2]]
pre_bootstrap_user_data = <<-EOT
#!/bin/bash
set -ex
cat <<-EOF > /etc/profile.d/bootstrap.sh
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}"
EOF
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}b"
}
}
I guess because multi part cloud init are run independently so setting bootstrap env here does not anything, compare to setting custom AMI where user data is only one part.
https://github.com/terraform-aws-modules/terraform-aws-eks/issues/1770#issuecomment-1011373264
If you want more/full control over the user data on AWS EKS managed node groups, then provide an AMI via ami_id - this is the only route for EKS managed node groups where you can take control of the user data - see docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-user-data
Note: it doesn't need to be a "custom" AMI - just use a data source like below. This will tell EKS managed node groups to not provide any user data and instead you can provide the full user data you desire via the module
And https://github.com/terraform-aws-modules/terraform-aws-eks/issues/1771#issuecomment-1013549557
@bryantbiggs Yes it works with ami_id
, I'm just saying this is "basic" configuration that should not required to use custom AMI even for migration purposes it is not possible to change from setting the ami in the node group to setting the AMI in the launch template as AWS API give an error.
I'm just trying to replicate the old behavior in v17 which is:
The following example just do not work as is and docker is used as runtime.
because of the multi part cloud init, the first script which set the export CONTAINER_RUNTIME
is run on it's own and does nothing, that's why the modification of the bootstrap.sh was done before.
I am open to be corrected, but the only way you can achieve what you posted is by setting an ami_id
(and again, this can be just the default AMI that EKS managed node groups would pull - it just signals to the service that you are going off script and that you will be providing the user data).
This rendered user data you posted above is NOT possible without setting ami_id
per the AWS docs
!/bin/bash
set -e export CONTAINER_RUNTIME="containerd" export USE_MAX_PODS=false export KUBELET_EXTRA_ARGS="--max-pods=110" B64_CLUSTER_CA=REDACTED API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com /etc/eks/bootstrap.sh pio-teks-demo-demo --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL
Unless users specify an ami_id
, the user data will be pre-pended to what the EKS managed node group service provides. You can tell when this is the case due to the MIME multipart formatting like you posted; the last block here being what the service provides (and what I think you are ultimately trying to influence):
MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="//"
--// Content-Transfer-Encoding: 7bit Content-Type: text/x-shellscript Mime-Version: 1.0
export CONTAINER_RUNTIME="containerd" export USE_MAX_PODS=false export KUBELET_EXTRA_ARGS="--max-pods=110"
--// Content-Type: text/x-shellscript; charset="us-ascii"
!/bin/bash
set -ex B64_CLUSTER_CA=REDACTED API_SERVER_URL=https://732CC77068B4A2F46DC40497FD380C96.sk1.eu-west-1.eks.amazonaws.com K8S_CLUSTER_DNS_IP=172.20.0.10 /etc/eks/bootstrap.sh pio-teks-demo-demo --kubelet-extra-args '--node-labels=eks.amazonaws.com/sourceLaunchTemplateVersion=1,eks.amazonaws.com/nodegroup-image=ami-020452378df41ab4b,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup=default-b-20220117091627900400000008,eks.amazonaws.com/sourceLaunchTemplateId=lt-01b0dc2b5e873543d --max-pods=110' --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL --dns-cluster-ip $K8S_CLUSTER_DNS_IP --use-max-pods false
--//--
So, if the question is one of "How do I get the user data to look like this" I think you understand that now. If the question is "How do I use containerd WITHOUT specifying an AMI ID - I think that is best asked to the EKS managed node group team. If you find a way to do this without specifying an AMI ID, I think that would be useful to add somewhere in the docs here for other users
I suspect the confusion also might lie in the "magic" that v17.x provided. IIRC, there was logic that would automatically provide the ami_id
which would provide the desired output you are looking for, but from a users perspective - the user did not have to directly set an ami_id
. This is sort of the things we want to be more transparent on in the modules so that the design intention by the API and AWS matches the module "API" here - it won't be 1:1 because we do aim to provide a good "out of the box" experience and sane, common default settings - but it should be more transparent and clear why something behaves a certain way and that way should align with the way it works in AWS (irrespective of the module)
@bryantbiggs I can confirm you that this works without specifying ami_id
:
"default-c" = {
desired_capacity = 1
ami_type = "AL2_x86_64"
instance_types = ["t3a.medium"]
subnets = [dependency.vpc.outputs.private_subnets[2]]
pre_bootstrap_user_data = <<-EOT
#!/bin/bash
set -ex
cat <<-EOF > /etc/profile.d/bootstrap.sh
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}"
EOF
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
EOT
k8s_labels = {
network = "private"
"topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}b"
}
}
This is based on this work
I guess my question is, should we update the docs or put this somewhere because this example do not enable containerd
From my interpretation it is just that when not using ami_id
or enable_bootstrap_user_data = true
there is no way for the bootstrap script to access the variable exported during the first multi part cloud-init, so just exporting variables here does nothing
@bryantbiggs I can confirm you that this works without specifying
ami_id
:"default-c" = { desired_capacity = 1 ami_type = "AL2_x86_64" instance_types = ["t3a.medium"] subnets = [dependency.vpc.outputs.private_subnets[2]] pre_bootstrap_user_data = <<-EOT #!/bin/bash set -ex cat <<-EOF > /etc/profile.d/bootstrap.sh export CONTAINER_RUNTIME="containerd" export USE_MAX_PODS=false export KUBELET_EXTRA_ARGS="--max-pods=${run_cmd("/bin/sh", "-c", "../../../../../../../tools/max-pods-calculator.sh --instance-type t3a.large --cni-version 1.10.1 --cni-prefix-delegation-enabled")}" EOF # Source extra environment variables in bootstrap script sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh EOT k8s_labels = { network = "private" "topology.ebs.csi.aws.com/zone" = "${include.root.locals.merged.aws_region}b" } }
This is based on this work
I guess my question is, should we update the docs or put this somewhere because this example do not enable containerd
I don't know what you mean by "this works" - the rendered user data is rendered without the MIME multipart or that containerd works when using this?
My bad, I mean that this enable containerd
and max_pods
without using ami_id
the user data is rendered with MIME multipart because of no custom AMI, but because the first part modify the bootstrap script to take the env vars into account, containerd is correctly enabled
ok got it - I think we're on the same page now.
so if I understand correctly, if users want to use containerd then they need to also specify sed -i '/^set -o errexit/a\\nsource /etc/profile.d/eks-bootstrap-env.sh' /etc/eks/bootstrap.sh
in their pre_bootstrap_user_data
, is that correct?
Yes this example produce a node with docker runtime (just tested).
If a user wants to:
ami_id
We need to reuse the same logic which was done here:
export VAR
to a file (with the cat
in my example)sed command
in my example)so this would be the custom user data needed:
#!/bin/bash
set -ex
cat <<-EOF > /etc/profile.d/bootstrap.sh
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=110"
EOF
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
So bootstrap.sh script would source the /etc/profile.d/bootstrap.sh
and make the supported configuration
got it - if you want to submit a PR to update the example you linked above and just throw down a note or two that would be much appreciated 🙏🏽
I am going to see if I can get some input from the EKS AMI team on their take - would be nice to just set env vars like
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=110"
At least, thats how it reads when you look at the bootstrap script but I also suppose its down to what and how the EKS managed node group user data is exposing that as well
Yes I think that's what the "magic" done in v17 was here with the bootstrap_env
var
I'm happy to open a PR to reinstate the logic from v17 that makes this work with MNGs
@bryantbiggs it looks like v18 removed the "magic" that allows variables to be set in the pre_bootstrap_user_data
and exported in bootstrap.sh
. I'll open a PR here to add the functionality back in and update the documentation to describe how it needs to be used, this is a non-breaking change. I'll also open a PR in the AMI repo to create and link the env file to bootstrap.sh
but I wouldn't hold our breath for getting it merged.
@ArchiFleKs as an aside I'd strongly recommend against using the AWS max pods calculator and suggest using the official Kubernetes documentation which is much simpler anyway. In a similar vein I'd suggest setting --kube-reserved
manually from the GKE docs (which is where the original EKS max pods logic came from). I'm planning on opening a PR here to automate this as an opt in setting as this has proved to be a better pattern than the EKS default which is incorrect for the old pod limits and even more so for the new ones (https://github.com/awslabs/amazon-eks-ami/issues/782).
@stevehipwell could you elaborate on the max pod calculator, because max pods depends on the ENI available to the instance right ? How do you set it right depending on the CNI used ?
What do you do with kube reserved ?
@ArchiFleKs the K8s recommended max pod density is 110 pods per node, which will only be realised if the node has enough resources so is a safe default across the board with the following caveat. If you're using the AWS VPC CNI in legacy mode some of the smaller node types need a custom limit as they can't support 110 pods, however if you're using prefix mode or IPv6 mode setting the limit to 110 would be correct.
For kube reserved I've automated the calculations used in GKE (plus AKS and the original EKS CPU), the linked issue on the AMI repo has some more details.
@bryantbiggs it looks like v18 removed the "magic" that allows variables to be set in the
pre_bootstrap_user_data
and exported inbootstrap.sh
. I'll open a PR here to add the functionality back in and update the documentation to describe how it needs to be used, this is a non-breaking change. I'll also open a PR in the AMI repo to create and link the env file tobootstrap.sh
but I wouldn't hold our breath for getting it merged.@ArchiFleKs as an aside I'd strongly recommend against using the AWS max pods calculator and suggest using the official Kubernetes documentation which is much simpler anyway. In a similar vein I'd suggest setting
--kube-reserved
manually from the GKE docs (which is where the original EKS max pods logic came from). I'm planning on opening a PR here to automate this as an opt in setting as this has proved to be a better pattern than the EKS default which is incorrect for the old pod limits and even more so for the new ones (awslabs/amazon-eks-ami#782).
In my opinion, my preference would for this to be fixed upstream so that users can simply use the interface as defined by the bootstrap.sh
via environment variables instead of going into specific customizations. and since this is only for one out of several scenarios (EKS managed node group where users do NOT supply an ami_id
, all other scenarios are unaffected and work as intended), my preference would be to document this workaround until a suitable fix is provided upstream
@bryantbiggs the current logic for MNGs where the ami_id
hasn't been provided is broken as the cluster_service_ipv4_cidr
variable is completely un-supported and there is no documentation on how to work around this or setting any other ENV variables. I would say that using the default AMI ID is the default use case for MNGs as if you're going to start providing AMI IDs you might as well self manage and not have the current MNG limitations.
I don't think an AWS provided fix will be quick and you'll still need to do some custom work here to support cluster_service_ipv4_cidr
so I don't see any other option than re-instating the working code from v17. AWS have no control over how cloud-init works so there is always going to be the requirement that any env variables wanted in bootstrap.sh
need to be saved to disk and sourced before it is run.
I plan on making the creation, linking from bootstrap.sh
and adding of the module values a separate step to the user defined pre user data script. Users will then have the ability to append values to a known file to make them available in bootstrap.sh
. If AWS do add a fix for this it will have to follow this pattern, so we can deprecate our file and point users at theirs before removing the custom file creation in a major release; we would still need a userdata script to set module values though.
@bryantbiggs there is also currently a slight inconsistency in the userdata logic. When using pre_bootstrap_user_data
for the MNG merge pattern it should start with the shebang and default set
statement but for other uses it should just be "actions". This isn't documented, nor is the un-availability of post_bootstrap_user_data
, so I'll add the doc updates in my PR.
@bryantbiggs the current logic for MNGs where the ami_id hasn't been provided is broken as the cluster_service_ipv4_cidr variable is completely un-supported and there is no documentation on how to work around this or setting any other ENV variables. I would say that using the default AMI ID is the default use case for MNGs as if you're going to start providing AMI IDs you might as well self manage and not have the current MNG limitations.
I think herein lies the discrepancy. From my viewpoint, the module is a "container" that is left up to users to utilize as they see fit since it can be used in many different ways. We try to match the API and functionality that it interacts with (i.e. - offload bootstrapping to the bootstrap script, but no additional magic for users to chase and find out whos doing what) as plainly as possible, but still provide means for extensibility and customization as they see fit. My perception of your viewpoint (and this is just my opinion), is that you have strong opinions on how folks are using and should be using this module. I think this is what made v17.x untenable - we can't be overly prescriptive. If the bootstrapping process is inadequate/broken we have to ask (IMO):
I don't think an AWS provided fix will be quick and you'll still need to do some custom work here to support cluster_service_ipv4_cidr so I don't see any other option than re-instating the working code from v17. AWS have no control over how cloud-init works so there is always going to be the requirement that any env variables wanted in bootstrap.sh need to be saved to disk and sourced before it is run.
Again, I think we are headed down a "path of least resistance" here, but not quite the right path (IMO). I think this is a perfect example of documenting how to work around a known issue, but we don't want to take on additional overhead due to the lack of response/support by AWS because then we suddenly support this burden. And remember, this is only for ONE scenario - the module supports several other scenarios that all work as users would expect. If we deviate for one configuration scenario, we are now "non-standard" as well and things start to become difficult to maintain.
@bryantbiggs there is also currently a slight inconsistency in the userdata logic. When using
pre_bootstrap_user_data
for the MNG merge pattern it should start with the shebang and defaultset
statement but for other uses it should just be "actions". This isn't documented, nor is the un-availability ofpost_bootstrap_user_data
, so I'll add the doc updates in my PR.
we are not providing set
because there was issues in the past as to what the default should be - -e
/ -ex
/ etc. - its left up to users to provide in the pre-bootstrap user data. I also believe that bash scripts still default to working as expected in cloudinit even without these components, but we can add in the shebang - that shouldn't be an issue
@bryantbiggs I see it slightly differently, I'm looking for the simplest "API" to be exposed in a consistent way. I also like to see things just work, so if adding internal code to normalise AWS decisions that fragment the underlying API helps then I'm all for it. I'd also strongly disagree that v17 had any fixed opinions regarding userdata; it had to support old patterns but fundamentally made it easy for users to set bootstrapping ENV variables no matter what actual bootstrap pattern they were using including the MNG cloud-init merge.
The current pattern isn't consistent and is actually broken for MNGs using merge, I'd like to fix this while keeping the complexity as minimal as possible.
You do provide a shebang and set
in some of the default template, I'd argue that they both should always be set in the defaults as a custom template can easily be used and for set
you can just change it inline.
Let me open a PR and we can discuss the actual details there.
I'm looking for the simplest "API" to be exposed in a consistent way.
This is what we have today. If we add this custom logic into one scenario, we no longer have that.
✅ EKS Managed Node Group (Linux): no additional user data
⚠️ EKS Managed Node Group (Linux): with prepended user data (merged with EKS MNG provided user data)
✅ EKS Managed Node Group (Linux/Bottlerocket): user provided user data (via ami_id
value being provided)
✅ EKS Managed Node Group (Linux/Bottlerocket): user provided user data (via ami_id
value being provided) w/ user provided user data template
✅ EKS Managed Node Group (Bottlerocket): no additional user data
✅ EKS Managed Node Group (Bottlerocket): with prepended user data (merged with EKS MNG provided user data)
✅ Self Managed Node Group (Linux EKS Opt.): pre and post bootstrap user data provided by user; module provided bootstrap
✅ Self Managed Node Group (Linux EKS Opt.): user provided user data template
✅ Self Managed Node Group (Bottlerocket): pre and post bootstrap user data provided by user; module provided bootstrap
✅ Self Managed Node Group (Bottlerocket): user provided user data template
✅ Self Managed Node Group (Windows): pre and post bootstrap user data provided by user; module provided bootstrap
✅ Self Managed Node Group (Windows): user provided user data template
Where the ⚠️ is what we are speaking about - that the user expectations of how to modify the bootstrap process is not working as users intend (not due to the fault of the module, but somewhere between the AWS EKS Optimized AMI bootstrap script and the overriding logic that the AWS EKS Managed Node Group service utilizes)
The changes you are referring to, will make this one use case completely different from the rest of the use cases and I would argue thats not good for the module. The change is only good to users who are looking to use EKS managed node groups with the EKS managed node group provided AMI (the service manages the AMI used, not users) and they are looking to modify kubelet settings. Therefore, I still believe that documenting this and letting users opt into this configuration would be my preferred choice
@bryantbiggs I'm going to have to disagree with you again, the current behaviour is inconsistent in a misleading way. I'm actually looking at this from the perspective of someone building a platform and wanting to make use of self-managed node groups, managed node groups with default AMI, and managed node groups with a specified AMI from the same inputs. This worked in v17 after significant community effort; I'm a fan of the general changes in v18 but I don't think that you've got the userdata logic quite right.
The inconsistencies in the "API":
set
tied to enable_bootstrap_user_data
and not standard for the default templateSERVICE_IPV4_CIDR
but none of the other bootstrap argumentsami_id
is providedbootstrap_extra_args
& post_bootstrap_user_data
ami_id
set
cluster_service_ipv4_cidr
My proposal is to normalise the behaviour in the following ways without any new variables being added:
set
cloud-init
env variable persistencecluster_service_ipv4_cidr
Alternatively if you really want to keep this consistent the variables not supported everywhere should be removed from the module as they can always be added back in with a custom template as you're saying should be done with the MNGs without an AMI.
you are aware of the following, yes?
@bryantbiggs I'm familiar with all of the above, I'm also very familiar with the various cases where AWS incorrectly documented functionality as unavailable on MNGs. I'm not sure how the top two relate to the bottom two other than to show that the whole EKS system is inconsistent, which is why this community module can offer so much by polyfilling missing capabilities and standardising the interface.
I've found another issue, if you want to use an AMI for a self-managed node group that doesn't use the bootstrap script you can't use the default template as the enable_bootstrap_user_data
is hardcoded to true
. This would work correctly if the variable was passed through.
I've found another issue, if you want to use an AMI for a self-managed node group that doesn't use the bootstrap script you can't use the default template as the
enable_bootstrap_user_data
is hardcoded totrue
. This would work correctly if the variable was passed through.
I've found another issue, if you want to use an AMI for a self-managed node group that doesn't use the bootstrap script you can't use the default template as the enable_bootstrap_user_data is hardcoded to true. This would work correctly if the variable was passed through.
yes you can:
No you can't use the default template to do this, even though it has the correct templating to do so.
You also can't set cluster_service_ipv4_cidr
for self-managed node groups even though it would work correctly if it was hooked up.
I don't follow most of what you are saying. I have clients to get back to but you can see the example directory that I setup in order to test/validate user data https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/user_data
as far as I am aware it works as intended to show all the various ways of rendering user data - the only caveat is the EKS managed node groups with default AMI (it doesn't show the user data that the EKS managed node group service would provide, just the user data that users would provide in cloud-init form that is pre-pended to what the service provides)
@bryantbiggs I'm sorry but the userdata behaviour in v18 regresses core functionality from v17 that had no reason to be removed and was only as complex as required to do the job (this is a complex area). I agree that some of the functionality needed to be pruned and refactored but most of it was needed and was fundamental to usability.
as far as I am aware it works as intended to show all the various ways of rendering user data - the only caveat is the EKS managed node groups with default AMI (it doesn't show the user data that the EKS managed node group service would provide, just the user data that users would provide in cloud-init form that is pre-pended to what the service provides)
This doesn't prove anything as you're just calling the user data module. You're missing var.enable_bootstrap_user_data
& var.cluster_service_ipv4_cidr
from the self managed node group module, which if added and configured to pass through correctly would just make this behaviour work as it already does for MNGs without an AMI ID.
I've opened https://github.com/terraform-aws-modules/terraform-aws-eks/pull/1789 to address the issues discussed here.
I've intentionally not tried to re-instate anything from v17 that wasn't needed to make the documented functionality work, so this PR is purely about consistency and supporting the documented behaviour.
@ArchiFleKs @bryantbiggs I know this issue is already closed bu tmaybe can still help. Just wanted to use your example but the nodes doesn't join the cluster at all and MNG creation times out after more then 20 min with: ╷ │ Error: error waiting for EKS Node Group (git-eks-demo-ipv4:git-eks-demo-ipv4-custom-mng-20220303120545395100000004) to create: unexpected state 'CREATE_FAILED', wanted target 'ACTIVE'. last error: 1 error occurred: │ * i-032c8b846d845340c: NodeCreationFailure: Instances failed to join the kubernetes cluster │ │ │ │ with module.eks_managed_node_group[0].aws_eks_node_group.this[0], │ on .terraform/modules/eks_managed_node_group/modules/eks-managed-node-group/main.tf line 269, in resource "aws_eks_node_group" "this": │ 269: resource "aws_eks_node_group" "this" { │ ╵
Here my config. locals { mng_bootstrap_cri_string = var.enable_containerd ? "--container-runtime containerd" : "" mng_bootstrap_ipv6_string = var.enable_ipv6_cluster ? "--ip-family ipv6 --service-ipv6-cidr ${data.local_file.ipv6-svc-cidr.content}" : "" mng_bootstrap_string = format("%s %s",local.mng_bootstrap_cri_string,local.mng_bootstrap_ipv6_string) } module "eks_managed_node_group" { source = "terraform-aws-modules/eks/aws//modules/eks-managed-node-group" version = "~> 18.8.1" count = var.enable_al2_cmng ? 1 : 0 name = "${local.name}-custom-mng" cluster_name = module.eks.cluster_id cluster_version = local.cluster_version desired_size = 1 max_size = 1 min_size = 0 vpc_id = module.vpc.vpc_id subnet_ids = module.vpc.private_subnets instance_types = ["t3.medium"] capacity_type = "ON_DEMAND" ebs_optimized = true enable_monitoring = true block_device_mappings = { xvda = { device_name = "/dev/xvda" ebs = { volume_size = 60 volume_type = "gp3" iops = 3000 encrypted = true kms_key_id = aws_kms_key.ebs.arn delete_on_termination = true } } } create_iam_role = false iam_role_arn = aws_iam_role.eks-worker.arn ami_type = "AL2_x86_64" pre_bootstrap_user_data = <<-EOT set -ex cat <<-EOF > /etc/profile.d/bootstrap.sh export CONTAINER_RUNTIME="containerd" EOF sed -i '/^set -o errexit/a\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh EOT / ami_id = data.aws_ami.al2_ami.id enable_bootstrap_user_data = true bootstrap_extra_args = "${local.mng_bootstrap_string}" / }
What am I doing wrong? I tried various combination including ami_id etc. none worked. I have to use the following othwrwise nodes do not join the cluster:
create_launch_template = false launch_template_name = ""
but bootstraping does not work as well.
@youwalther65 here is how I configure bottle rocket and AL2 : https://github.com/particuleio/teks/blob/699bf468440ff8fae0eadfb61cab1d1508e37f2b/terragrunt/live/production/eu-west-1/clusters/demo/eks/terragrunt.hcl#L138
@bryantbiggs @ArchiFleKs I always used your examples but until now just in (sub)module "eks_managed_node_group" module "eks_managed_node_group" { source = "terraform-aws-modules/eks/aws//modules/eks-managed-node-group" ...
where it does not work.
Now I put it into module eks under eks_managed_node_groups directly and it works! So for me it seems an issue in the module "eks_managed_node_group". I used your example "containerd"
module "eks" {
source = "terraform-aws-modules/eks/aws"
...
eks_managed_node_groups = {
containerd = {
name = "containerd"
min_size = 1
max_size = 3
desired_size = 1
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
instance_types = ["t3.large"]
capacity_type = "SPOT"
create_iam_role = false
iam_role_arn = aws_iam_role.eks-worker.arn
# See issue https://github.com/awslabs/amazon-eks-ami/issues/844
pre_bootstrap_user_data = <<-EOT
#!/bin/bash
set -ex
cat <<-EOF > /etc/profile.d/bootstrap.sh
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
export KUBELET_EXTRA_ARGS="--max-pods=110"
EOF
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
EOT
}
I thought syntax/behaviour in sub module is the same in 18.x and I can use it similar to section in module eks ?
The code in #1789 should help. I'm currently on annual leave but expect to finish testing it next week.
I thought syntax/behaviour in sub module is the same in 18.x and I can use it similar to section in module eks ?
Yes the functionality is the same because they are referencing the same code, but in the root module we provide a lot of defaults that are passed to the sub-module. You cannot copy the eks_managed_node_groups
block out of the root module and put it into a sub-module definition. You have to ensure that you are passing all of the necessary inputs to the sub-module when using independently; the root module provides a lot of those "sane" defaults where possible
@bryantbiggs Sure, I checked the submodule documentation to my best knowledge but I didn't get it running. Would be nice if some of you can provide a working containerd example for the MNG sub module as well. I use the sub module to allow optional node groups creation using count. In the meantime I will be waiting for the changes to come mentioned by @stevehipwell Thank you for your continuous efforts.
For anyone still struggling to find the right settings to get nodes up with containerd runtime, here're the necessary properties for using both EKS-managed launch template / user_data, and those managed using EKS module.
If you want to use EKS-managed user_data, set the following for your node group (and DON'T set an ami_id
):
module "eks" {
# ...
eks_managed_node_groups = {
group_name = {
create_launch_template = false
launch_template_name = ""
pre_bootstrap_user_data = <<-EOT
#!/bin/bash
set -ex
cat <<-EOF > /etc/profile.d/bootstrap.sh
export CONTAINER_RUNTIME="containerd"
export USE_MAX_PODS=false
EOF
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
EOT
}
}
}
If you want to use module-managed user_data, set the following for your node group (ami_id
is mandatory, has to be an EKS-optimized AMI):
module "eks" {
# ...
eks_managed_node_groups = {
group_name = {
ami_id = data.aws_ami.eks_default.image_id # set this to an EKS-optimized AMI from data resources (x86 and arm examples below)
create_launch_template = true
# launch_template_name = "" # optional if you want your own name
enable_bootstrap_user_data = true
bootstrap_extra_args = "--container-runtime containerd --kubelet-extra-args '--max-pods=110'"
}
}
}
data "aws_ami" "eks_default" { most_recent = true owners = ["amazon"] filter { name = "name" values = ["amazon-eks-node-${local.cluster_version}-v*"] } }
data "aws_ami" "eks_default_arm" { most_recent = true owners = ["amazon"] filter { name = "name" values = ["amazon-eks-arm64-node-${local.cluster_version}-v*"] } }
Both of these approaches will result in nodes coming up with containerd runtime.
Description
I can't create eks_managed_node_groups as described in README.md Combination of this parameters
doesn't work with or without below parameters
if I use enable_bootstrap_user_data = true I get error
If enable_bootstrap_user_data = false I have such user data in launch template
Versions
Reproduction
Steps to reproduce the behavior: create such group using terraform-aws-eks v18.0.6
Expected behavior
I need the kubernetes node with CONTAINER-RUNTIME containerd and --kubelet-extra-args '--max-pods=20'