aws-ia / terraform-aws-eks-blueprints

Configure and deploy complete EKS clusters.
https://aws-ia.github.io/terraform-aws-eks-blueprints/
Apache License 2.0
2.72k stars 1.43k forks source link

Mount issues with module aws_efs_csi_driver #1171

Closed frank-bee closed 2 years ago

frank-bee commented 2 years ago

Description

When using the EFS add-on I face the following issue: Could not start amazon-efs-mount-watchdog, unrecognized init system "aws-efs-csi-dri" b'mount.nfs4: access denied by server while mounting 127.0.0.1:/

My configuration is almost exactly the same as in the example.

Detailed error message

MountVolume.SetUp failed for volume "pvc-4d2561b8-b803-4722-8dfc-713ccd78e38d" : rpc error: code = Internal desc = Could not mount "fs-00703eb8b7961a9af:/" at "/var/lib/kubelet/pods/b9b49102-f92a-4a3b-813a-ad8b7c4f06af/volumes/kubernetes.io~csi/pvc-4d2561b8-b803-4722-8dfc-713ccd78e38d/mount": mount failed: exit status 32 Mounting command: mount Mounting arguments: -t efs -o accesspoint=fsap-0c180bb904c5be851,tls fs-00703eb8b7961a9af:/ /var/lib/kubelet/pods/b9b49102-f92a-4a3b-813a-ad8b7c4f06af/volumes/kubernetes.io~csi/pvc-4d2561b8-b803-4722-8dfc-713ccd78e38d/mount Output: Could not start amazon-efs-mount-watchdog, unrecognized init system "aws-efs-csi-dri" b'mount.nfs4: access denied by server while mounting 127.0.0.1:/' Warning: config file does not have fips_mode_enabled item in section mount.. You should be able to find a new config file in the same folder as current config file /etc/amazon/efs/efs-utils.conf. Consider update the new config file to latest config file. Use the default value [fips_mode_enabled = False].Warning: config file does not have retry_nfs_mount_command item in section mount.. You should be able to find a new config file in the same folder as current config file /etc/amazon/efs/efs-utils.conf. Consider update the new config file to latest config file. Use the default value [retry_nfs_mount_command = True].

Versions

Reproduction Code

module "eks_blueprints" {
  source = "github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.16.0"

  cluster_name    = local.name
  cluster_version = var.cluster_version

  vpc_id             = module.vpc.vpc_id
  private_subnet_ids = module.vpc.private_subnets

  managed_node_groups = {
    spot_2vcpu_8mem = {
      node_group_name = "mng-spot-2vcpu-8mem"
      capacity_type   = "SPOT"
      instance_types  = ["m5.large", "m4.large", "m6a.large", "m5a.large", "m5d.large"]
      max_size        = 8
      desired_size    = 2
      min_size        = 1

      subnet_ids = [module.vpc.private_subnets[0], module.vpc.private_subnets[1]]
    }
  }

  map_users = var.map_users
}

module "eks_blueprints_kubernetes_addons" {
  source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.16.0"

  eks_cluster_id       = module.eks_blueprints.eks_cluster_id
  eks_cluster_endpoint = module.eks_blueprints.eks_cluster_endpoint
  eks_oidc_provider    = module.eks_blueprints.oidc_provider
  eks_cluster_version  = module.eks_blueprints.eks_cluster_version
  eks_cluster_domain   = local.domain

  # EKS Managed Add-ons
  enable_amazon_eks_vpc_cni            = true
  enable_amazon_eks_coredns            = true
  enable_amazon_eks_kube_proxy         = true
  enable_amazon_eks_aws_ebs_csi_driver = true

  # Add-ons
  enable_aws_load_balancer_controller = true
  enable_external_dns                 = true
  enable_metrics_server               = var.enable_metrics_server
  enable_cluster_autoscaler           = true
  enable_aws_cloudwatch_metrics       = var.enable_aws_cloudwatch_metrics
  enable_external_secrets             = true
  #enable_prometheus                   = true
  enable_ingress_nginx      = true
  enable_aws_efs_csi_driver = true
  ingress_nginx_helm_config = {
    values = [templatefile("${path.module}/nginx-values.yaml", {
      hostname     = local.domain
      ssl_cert_arn = aws_acm_certificate.cert.arn
    })]
  }
}

///
// EFS
///

module "efs-landing" {
  source  = "terraform-aws-modules/efs/aws"
  version = "~> 1.0"

  creation_token = local.landing_name_full
  name           = local.landing_name_full

  # Mount targets / security group
  mount_targets = { for k, v in toset(range(length(local.azs))) :
    element(local.azs, k) => { subnet_id = element(module.vpc.private_subnets, k) }
  }
  security_group_description = "${local.landing_name_full} EFS security group"
  security_group_vpc_id      = module.vpc.vpc_id
  security_group_rules = {
    vpc = {
      # relying on the defaults provdied for EFS/NFS (2049/TCP + ingress)
      description = "NFS ingress from VPC private subnets"
      cidr_blocks = module.vpc.private_subnets_cidr_blocks
    }
  }
}

resource "kubernetes_storage_class_v1" "efs-landing" {
  metadata {
    name = "efs-landing"
  }

  storage_provisioner = "efs.csi.aws.com"
  parameters = {
    provisioningMode = "efs-ap" # Dynamic provisioning
    fileSystemId     = module.efs-landing.id
    directoryPerms   = "700"
  }
}

Steps to reproduce the behavior:

launch the following example pod with EFS PVC

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: efs-claim
  namespace: landing
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: efs-landing
  resources:
    requests:
      storage: 5Gi
---
apiVersion: v1
kind: Pod
metadata:
  name: efs-app
  namespace: landing
spec:
  containers:
    - name: app
      image: centos
      command: ["/bin/sh"]
      args: ["-c", "while true; do echo $(date -u) >> /data/out; sleep 5; done"]
      volumeMounts:
        - name: persistent-storage
          mountPath: /data
  volumes:
    - name: persistent-storage
      persistentVolumeClaim:
        claimName: efs-claim

Expected behaviour

Pod should launch

Actual behaviour

Error message, see above

bla-ckbox commented 2 years ago

Same Problem

Versions

The problem does not appear if you create a filesystem by following this procedure https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html#efs-create-filesystem

and running:

---
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: efs-sc
provisioner: efs.csi.aws.com
---
apiVersion: v1
kind: PersistentVolume
metadata:
  name: efs-pv
spec:
  capacity:
    storage: 5Gi
  volumeMode: Filesystem
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  storageClassName: efs-sc
  csi:
    driver: efs.csi.aws.com
    volumeHandle: fs-xxxxxxxxxxxxxx
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: efs-claim
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: efs-sc
  resources:
    requests:
      storage: 5Gi
  volumeName: efs-pv
---
apiVersion: v1
kind: Pod
metadata:
  name: app1
spec:
  containers:
  - name: app1
    image: busybox
    command: ["/bin/sh"]
    args: ["-c", "while true; do echo $(date -u) >> /data/out1.txt; sleep 5; done"]
    volumeMounts:
    - name: persistent-storage
      mountPath: /data
  volumes:
  - name: persistent-storage
    persistentVolumeClaim:
      claimName: efs-claim
---
apiVersion: v1
kind: Pod
metadata:
  name: app2
spec:
  containers:
  - name: app2
    image: busybox
    command: ["/bin/sh"]
    args: ["-c", "while true; do echo $(date -u) >> /data/out2.txt; sleep 5; done"]
    volumeMounts:
    - name: persistent-storage
      mountPath: /data
  volumes:
  - name: persistent-storage
    persistentVolumeClaim:
      claimName: efs-claim