databricks / terraform-provider-databricks

Databricks Terraform Provider
https://registry.terraform.io/providers/databricks/databricks/latest
Other
446 stars 385 forks source link

[ISSUE] cannot create secret scope: Azure KeyVault is not available #2465

Open MohnJadden opened 1 year ago

MohnJadden commented 1 year ago

Configuration

terraform {
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = ">=3.5.0"
    }
    databricks = {
    source = "databricks/databricks" }
  }
}

data "azurerm_key_vault" "keyvault-dbricktest" {
  name                = "keyvault-dbricktest"
  resource_group_name = "company-ML-AnalyticsTest-RG"
}

data "azurerm_key_vault_secret" "storage-secret" {
  name         = "Scope1"
  key_vault_id = data.azurerm_key_vault.keyvault-dbricktest.id
}

data "azurerm_key_vault_secret" "svcdatabrickstest" {
  name         = "svcdatabrickstest"
  key_vault_id = "/subscriptions/ourSubscriptionGUID/resourceGroups/company-Admin-RG/providers/Microsoft.KeyVault/vaults/company-Engineering-KV"
}

data "azurerm_client_config" "current" {

}
provider "databricks" {

  #host = "https://${azurerm_databricks_workspace.dbricks-workspace[each.key].workspace_url}"
  azure_client_id     = "databricksServicePrincipalGUID"
  azure_tenant_id     = "ourTenantGUID"
  azure_client_secret = data.azurerm_key_vault_secret.svcdatabrickstest.value
}

resource "azurerm_resource_group" "dbricks-rg" {
  for_each = var.useCases
  name     = "${each.value.useCase}-dbricks"
  location = coalesce(each.value.location, each.key)
  tags = {
    owner            = coalesce(each.value.owner, each.key)
    technicalContact = coalesce(each.value.technicalContact, each.key)
    departmentName   = coalesce(each.value.departmentName, each.key)
    teamName         = coalesce(each.value.teamName, each.key)
  }
}
resource "azurerm_storage_account" "dbricks-storage" {
  for_each                          = var.useCases
  name                              = "${each.value.useCase}dbricksa"
  resource_group_name               = azurerm_resource_group.dbricks-rg[each.key].name
  location                          = coalesce(each.value.location, each.key)
  account_tier                      = "Standard"
  account_replication_type          = "ZRS"
  enable_https_traffic_only         = true
  min_tls_version                   = "TLS1_2"
  public_network_access_enabled     = false
  infrastructure_encryption_enabled = true
  is_hns_enabled                    = true
  cross_tenant_replication_enabled  = false
  default_to_oauth_authentication   = true
  allow_nested_items_to_be_public = false
  tags = {
    owner            = coalesce(each.value.owner, each.key)
    technicalContact = coalesce(each.value.technicalContact, each.key)
    departmentName   = coalesce(each.value.departmentName, each.key)
    teamName         = coalesce(each.value.teamName, each.key)
  }
  network_rules {
    default_action             = "Deny"
    virtual_network_subnet_ids = ["/subscriptions/ourSubscriptionGUID/resourceGroups/company-EastVNET-RG/providers/Microsoft.Network/virtualNetworks/company-East-VNET/subnets/ML", "/subscriptions/ourSubscriptionGUID/resourceGroups/company-EastVNET-RG/providers/Microsoft.Network/virtualNetworks/company-East-VNET/subnets/Admin"]
    bypass                     = ["Logging", "Metrics", "AzureServices"]
  }

}

resource "azurerm_databricks_workspace" "dbricks-workspace" {
  depends_on = [
    azurerm_storage_account.dbricks-storage
  ]
  for_each                          = var.useCases
  name                              = "${each.value.useCase}-dbricks"
  location                          = coalesce(each.value.location, each.key)
  resource_group_name               = azurerm_resource_group.dbricks-rg[each.key].name
  sku                               = "premium"
  managed_resource_group_name       = "${azurerm_resource_group.dbricks-rg[each.key].name}-managedrg"
  custom_parameters {
    no_public_ip                                        = true
    virtual_network_id                                  = coalesce(each.value.vnetID, each.key)
    public_subnet_name                                  = coalesce(each.value.publicSubnetName, each.key)
    private_subnet_name                                 = coalesce(each.value.privateSubnetName, each.key)
    public_subnet_network_security_group_association_id = coalesce(each.value.publicSubnetNSG, each.key)
    private_subnet_network_security_group_association_id = coalesce(each.value.privateSubnetNSG, each.key)
    storage_account_name                                = azurerm_storage_account.dbricks-storage[each.key].name
  }

}
resource "azurerm_role_assignment" "dbricks-storage-user" {
  for_each             = var.useCases
  role_definition_name = "Storage Blob Data Contributor"
  scope                = "/subscriptions/ourSubscriptionGUID/resourceGroups/${azurerm_resource_group.dbricks-rg[each.key].name}"
  principal_id         = coalesce(each.value.securityGroup, each.key)
}

resource "azurerm_key_vault" "dbricks-kv" {
  for_each                   = var.useCases
  name                       = "${each.value.useCase}-dbricks-kv"
  location                   = coalesce(each.value.location, each.key)
  resource_group_name        = azurerm_resource_group.dbricks-rg[each.key].name
  tenant_id                  = "ourTenantGUID"
  soft_delete_retention_days = 7
  purge_protection_enabled   = false
  sku_name                   = "standard"
  access_policy  {
    tenant_id          = "ourTenantGUID"
      object_id          = data.azurerm_client_config.current.object_id
      secret_permissions = ["Delete", "Get", "List", "Set"]
      certificate_permissions = ["ManageContacts"]
  }
  lifecycle {
    ignore_changes = [access_policy]
}
}

# Grant KV access to our user account in order to set the secret
# resource "azurerm_key_vault_access_policy" "dbricks-kv-access" {
#     for_each = var.useCases
#       tenant_id          = data.azurerm_client_config.current.tenant_id
#       object_id          = data.azurerm_client_config.current.object_id
#       key_vault_id       = azurerm_key_vault.dbricks-kv[each.key].id
#       secret_permissions = ["Delete", "Get", "List", "Set"]
#       certificate_permissions = ["ManageContacts"]

#     }

resource "databricks_secret_scope" "dbricks-kv-scope" {
  for_each                 = var.useCases
  name                     = "${each.value.useCase}-dbricks-scope"
  initial_manage_principal = "users"
  keyvault_metadata {
    resource_id = azurerm_key_vault.dbricks-kv[each.key].id
    dns_name    = azurerm_key_vault.dbricks-kv[each.key].vault_uri
  }
}
resource "databricks_secret" "storage" {
  for_each     = var.useCases
  key          = "Storage"
  string_value = data.azurerm_key_vault_secret.storage-secret.value
  scope        = databricks_secret_scope.dbricks-kv-scope[each.key].id
}

resource "databricks_cluster_policy" "dbricks-baseline" {
  for_each   = var.useCases
  name       = "${each.value.useCase}-baseline"
  definition = <<EOT
"spark_conf.spark.databricks.cluster.profile": {
    "type": "forbidden",
    "hidden": true
  },
  "spark_conf.spark.driver.extraJavaOptions": {
    "type": "fixed",
    "value": "-Duser.timezone=America/New_York",
    "hidden": true
  },
  "spark_conf.spark.ui.retainedTasks": {
    "type": "fixed",
    "value": "100000",
    "hidden": true
  },
  "spark_conf.spark.ui.retainedStages": {
    "type": "fixed",
    "value": "3000",
    "hidden": true
  },
  "azure_attributes.availability": {
    "type": "fixed",
    "value": "ON_DEMAND_AZURE"
  },
  "autotermination_minutes": {
    "type": "fixed",
    "value": 20
  },
    "spark_conf.fs.azure.account.oauth2.client.id.${azurerm_storage_account.dbricks-storage[each.key].name}.dfs.core.windows.net": {
    "type": "fixed",
    "value": "databricksServicePrincipalGUID",
    "hidden": true
  },
  "spark_conf.spark.ui.retainedJobs": {
    "type": "fixed",
    "value": "1000",
    "hidden": true
  },
  "spark_conf.spark.databricks.delta.preview.enabled": {
    "type": "fixed",
    "value": "true",
    "hidden": true
  },
  "spark_conf.fs.azure.account.oauth2.client.endpoint.${azurerm_storage_account.dbricks-storage[each.key].name}.dfs.core.windows.net": {
    "type": "fixed",
    "value": "https://login.microsoftonline.com/ourTenantGUID/oauth2/token",
    "hidden": true
  },
  "spark_conf.spark.sql.ui.retainedExecutions": {
    "type": "fixed",
    "value": "2000",
    "hidden": true
  },
  "spark_conf.fs.azure.account.oauth2.client.secret.${azurerm_storage_account.dbricks-storage[each.key].name}.dfs.core.windows.net": {
    "type": "fixed",
    "value": "{{secrets/${databricks_secret_scope.dbricks-kv-scope[each.key].name}/Storage}}",
    "hidden": true
  },
  "spark_conf.fs.azure.account.auth.type.${azurerm_storage_account.dbricks-storage[each.key].name}.dfs.core.windows.net": {
    "type": "fixed",
    "value": "OAuth",
    "hidden": true
  },
  "spark_conf.fs.azure.account.oauth.provider.type.${azurerm_storage_account.dbricks-storage[each.key].name}.dfs.core.windows.net": {
    "type": "fixed",
    "value": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "hidden": true
  },
  #  "autoscale.min_workers": {
  #   "type": "fixed",
  #   "value": coalesce(each.value.minWorkers, each.key)
  # },
  # "autoscale.max_workers": {
  #   "type": "fixed",
  #   "value": coalesce(each.value.maxWorkers, each.key)
  # },
  "node_type_id": {
    "type": "fixed",
    "value": coalesce(each.value.workerNodeTypeID, each.key)
  },
  "driver_node_type_id": {
    "type": "fixed",
    "value": coalesce(each.value.driverNodeTypeID, each.key)
  }
EOT
}

resource "databricks_permissions" "can_use_baseline_policy" {
  for_each          = var.useCases
  cluster_policy_id = databricks_cluster_policy.dbricks-baseline[each.key].id
  access_control {
    group_name       = "All Users"
    permission_level = "CAN_USE"
  }
}

Expected Behavior

The Key Vault should be created and allow my user account to create/delete/get/list/set secrets. A secret scope should be created within the Databricks workspace. The secret scope should be able to talk to the Key Vault specified in each key.

Actual Behavior

The Key Vault is created, but we receive an error:

│ Error: cannot create secret scope: Azure KeyVault is not available
│
│   with module.databricks.databricks_secret_scope.dbricks-kv-scope["useCase1"],
│   on modules\databricks\main.tf line 138, in resource "databricks_secret_scope" "dbricks-kv-scope":
│  138: resource "databricks_secret_scope" "dbricks-kv-scope" {

The error recurs whenever we run terraform apply after the initial creation.

Steps to Reproduce

  1. Run terraform apply
  2. Specify yes to apply changes
  3. Error occurs

Terraform and provider versions

Terraform v1.3.3
on windows_amd64
+ provider registry.terraform.io/databricks/databricks v1.20.0
+ provider registry.terraform.io/hashicorp/azurerm v3.63.0

Debug Output

(https://gist.github.com/MohnJadden/f945f60bd1b50fcb7bdb18096b18eed7)

Important Factoids

The error recurs even if I delete the entire resource group that Terraform creates and I re-create it. I noticed in the debug logs that it was erroring out that I don't have access to certificate ManageContact perms on the Key Vault, but even if I add that permission to my access policy on the KVs, the error continues.

VirtualEvan commented 1 year ago

I am facing the same issue.

By looking at the source code where the error comes from, it looks like the provider is failing to detect the HTTP client configuration for Azure.

https://github.com/databricks/terraform-provider-databricks/blob/b52eed9de62e43a17895e99406ee3bed699a7e72/secrets/resource_secret_scope.go#L68 https://github.com/databricks/terraform-provider-databricks/blob/b52eed9de62e43a17895e99406ee3bed699a7e72/common/client.go#L148

I couldn't find any relevant information in the debug log so far.

alexott commented 1 year ago

You are missing host property or azure_workspace_resource_id: https://registry.terraform.io/providers/databricks/databricks/latest/docs#azure_workspace_resource_id

MohnJadden commented 1 year ago

@alexott I had that commented out in the original above. The provider block doesn't allow the use of each.key, so it can't read the resource ID of the Databricks resource we're trying to create.

This presents a bit of an issue. This module is meant to create Azure Databricks workspaces. The workspace URL is not available until it's created. How are we meant to define a host for the provider to use since it's a brand new resource and it won't allow the use of a loop? This would mean we'd have to create static main.tf files and define each individual environment, which is not really how we want to proceed.