hashicorp / terraform-provider-kubernetes

Terraform Kubernetes provider
https://www.terraform.io/docs/providers/kubernetes/
Mozilla Public License 2.0
1.59k stars 973 forks source link

Unable to create namespace using kubernetes_namespace #2112

Open sunilnagavelli opened 1 year ago

sunilnagavelli commented 1 year ago

Terraform Version, Provider Version and Kubernetes Version

Terraform version: 1.4.0
Kubernetes provider version: v2.20.0
Kubernetes version: 1.25.6

Affected Resource(s)

Azure Kubernetes Service namepace

Terraform Configuration Files

Azure Kubernetes Service

Provider Config:
provider "azurerm" {
  tenant_id       = var.tenant_id
  client_id       = var.client_id
  client_secret   = var.client_secret
  subscription_id = var.subscription_id
  features {}
}

provider "kubernetes" {
  host                   = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.host
  username               = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.username
  password               = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.password
  client_certificate     = base64decode(azurerm_kubernetes_cluster.aks_cluster.kube_config.0.client_certificate)
  client_key             = base64decode(azurerm_kubernetes_cluster.aks_cluster.kube_config.0.client_key)
  cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks_cluster.kube_config.0.cluster_ca_certificate)
}

provider "helm" {
  kubernetes {
    host                   = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.host
    username               = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.username
    password               = azurerm_kubernetes_cluster.aks_cluster.kube_config.0.password
    client_certificate     = base64decode(azurerm_kubernetes_cluster.aks_cluster.kube_config.0.client_certificate)
    client_key             = base64decode(azurerm_kubernetes_cluster.aks_cluster.kube_config.0.client_key)
    cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks_cluster.kube_config.0.cluster_ca_certificate)
    #config_path            = "~/.kube/config"
  }
}
AKS Cluster Resource Definition:
resource "random_string" "random_str" {
  length  = 8
  special = false
  upper   = false
}

# AKS Cluster
resource "azurerm_kubernetes_cluster" "aks_cluster" {
  name                = "aks-cluster-${local.suffix}"
  location            = var.location
  resource_group_name = var.rg_name
  dns_prefix          = replace("akscluster${local.suffix}", "-", "")

  # Network related settings
  network_profile {
    network_plugin      = var.network_profile.network_plugin
    network_policy      = var.network_profile.network_policy
    network_plugin_mode = var.network_profile.network_plugin_mode
    load_balancer_sku   = var.network_profile.load_balancer_sku
    outbound_type       = var.network_profile.outbound_type
    dns_service_ip      = var.network_profile.dns_service_ip
    docker_bridge_cidr  = var.network_profile.docker_bridge_cidr
    pod_cidr            = var.network_profile.pod_cidr
    pod_cidrs           = var.network_profile.pod_cidrs
    service_cidr        = var.network_profile.service_cidr
    service_cidrs       = var.network_profile.service_cidrs
    ip_versions         = var.network_profile.ip_versions
    network_mode        = var.network_profile.network_mode
    ebpf_data_plane     = var.network_profile.ebpf_data_plane
  }

  local_account_disabled = false

  role_based_access_control_enabled = true

  azure_active_directory_role_based_access_control {
    managed            = true
    azure_rbac_enabled = true
    #admin_group_object_ids = values(var.aks_admin_groups_aad)
  }

  default_node_pool {
    name                        = "default"
    node_count                  = var.default_node_pool.node_count
    vm_size                     = var.default_node_pool.vm_size
    temporary_name_for_rotation = lower("tmp${random_string.random_str.result}")
  }

  identity {
    type = "SystemAssigned"
  }

  tags = var.tags

  #depends_on = [null_resource.wait_for_cluster]
}

# resource "null_resource" "wait_for_cluster" {
#   provisioner "local-exec" {
#     command = "until kubectl get nodes; do sleep 5; done"
#   }
# }

resource "azurerm_role_assignment" "aad_rbac_cluster_admin" {
  scope              = azurerm_kubernetes_cluster.aks_cluster.id
  role_definition_id = data.azurerm_role_definition.aad_rbac_cluster_admin.id
  for_each           = var.aks_admin_groups_aad
  principal_id       = each.value
}

resource "azurerm_role_assignment" "aad_rbac_aks_cluster_admin_role" {
  scope              = azurerm_kubernetes_cluster.aks_cluster.id
  role_definition_id = data.azurerm_role_definition.aad_rbac_aks_cluster_admin_role.id
  for_each           = var.aks_admin_groups_aad
  principal_id       = each.value
}

resource "azurerm_kubernetes_cluster_node_pool" "aks_node_pools" {
  for_each = var.aks_node_pools

  name                  = each.key
  kubernetes_cluster_id = azurerm_kubernetes_cluster.aks_cluster.id
  vm_size               = each.value.vm_size
  node_count            = each.value.node_count
}

# Retrieve the AKS cluster principal ID
data "azurerm_kubernetes_cluster" "aks_cluster_data" {
  name                = azurerm_kubernetes_cluster.aks_cluster.name
  resource_group_name = azurerm_kubernetes_cluster.aks_cluster.resource_group_name

  depends_on = [azurerm_kubernetes_cluster.aks_cluster]
}

# Create namespace
resource "kubernetes_namespace" "app_namespace" {
  metadata {
    name = local.k8s_app_namespace
  }
}

# Create Nginx Ingress controller
resource "helm_release" "nginx_ingress" {
  name       = "nginx-ingress"
  namespace  = kubernetes_namespace.app_namespace.metadata[0].name
  chart      = "stable/nginx-ingress"
  version    = "1.41.3"
  repository = "https://charts.helm.sh/stable"

  set {
    name  = "controller.service.loadBalancerIP"
    value = azurerm_public_ip.nginx_ingress.ip_address
  }

  depends_on = [
    azurerm_role_assignment.aad_rbac_aks_writer_role,
    kubernetes_namespace.app_namespace
  ]
}

# Create public IP for Nginx Ingress controller
resource "azurerm_public_ip" "nginx_ingress" {
  name                = "nginx-ingress-public-ip-${local.suffix}"
  location            = var.location
  resource_group_name = var.rg_name
  allocation_method   = "Static"
}

Debug Output

Panic Output

Error: Unauthorized
│ 
│   with kubernetes_namespace.app_namespace,
│   on azurerm_aks_cluster.tf line 132, in resource "kubernetes_namespace" "app_namespace":
│  132: resource "kubernetes_namespace" "app_namespace" {
│ 
╵
Operation failed: failed running terraform apply (exit 1)

Steps to Reproduce

Expected Behavior

What should have happened?

AKS cluster with the kubernetes namespace should be created.

Actual Behavior

What actually happened?

Namespace creation failed with unauthorized error

Important Factoids

References

Community Note

jrhouston commented 1 year ago

Thanks for opening an issue @sunilnagavelli. It looks like you are trying to create a Kubernetes cluster then create Kubernetes resources in the same apply operator – this is known to cause strange issues when trying to authenticate with the cluster. Our documentation recommends to split these so that your cluster creation and Kubernetes resource creation happen in separate apply runs.

philippbussche commented 7 months ago

I am actually facing a very similar issue and in my setup I have the cluster created already for a while but authentication for the kubernetes provider still fails.

What is very similar in my setup is that I am (also) using an Azure Service Principal to deploy the AKS cluster and based on this I think @sunilnagavelli is using one too:

provider "azurerm" {
  tenant_id       = var.tenant_id
  client_id       = var.client_id
  client_secret   = var.client_secret
  subscription_id = var.subscription_id
  features {}
}

Furthermore Azure AD RBAC is enabled for the AKS cluster here which I have as well:

azure_active_directory_role_based_access_control {
    managed            = true
    azure_rbac_enabled = true
    #admin_group_object_ids = values(var.aks_admin_groups_aad)
  }

I am not making use of any admin groups but instead have assigned the necessary Azure AD role (RBAC Admin) directly to the service principal so that it has the permissions to e.g. create the namespace. I can only assume that @sunilnagavelli has added their service principal to the admin group specified above so that it also has the required permission.

So the remaining question is: how do I authentication in the kubernetes provider using an Azure AD Service Principal which typically has a client_id and client_secret property ?

philippbussche commented 7 months ago

This might be solving the issue, I will try this now also:

https://github.com/hashicorp/terraform-provider-kubernetes/issues/2072#issuecomment-1508197008

philippbussche commented 7 months ago

Yes, so with a provider initialization like the following I can confirm it will work when Azure AD RBAC is enabled and things are deployed through a Service Principal:

provider "kubernetes" {
  host = format("%s://%s:%s", "https", azurerm_kubernetes_cluster.kubernetes_cluster.fqdn, "443")
  cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.kubernetes_cluster.kube_config[0].cluster_ca_certificate)

  exec {
    api_version = "client.authentication.k8s.io/v1beta1"
    command     = "/usr/local/bin/kubelogin"
    args = [
      "get-token",
      "--login",
      "spn",
      "--environment",
      "AzurePublicCloud",
      "--tenant-id",
      data.azurerm_client_config.current.tenant_id,
      "--server-id",
      data.azuread_service_principal.aks_server_sp.client_id,
      "--client-id",
      data.azurerm_client_config.current.client_id,
      "--client-secret",
      data.azurerm_key_vault_secret.sp_secret.value,
    ]
  }
}

Btw. I have to use a different value for the host name since we are using private API server VNet integration feature also.