hashicorp / terraform-provider-azurerm

Terraform provider for Azure Resource Manager
https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs
Mozilla Public License 2.0
4.53k stars 4.61k forks source link

azurerm_network_security_group: Plan/Apply for 'dynamic "securtity_rule"' deletes and recreates **all rules with lists** when a rule is deleted #22630

Open kahawai-sre opened 1 year ago

kahawai-sre commented 1 year ago

Is there an existing issue for this?

Community Note

Terraform Version

1.5.3

AzureRM Provider Version

3.63.0

Affected Resource(s)/Data Source(s)

azurerm_network_security_group - inline security_rules

Terraform Configuration Files

terraform {
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "= 3.63.0"
    }
  }
}

provider "azurerm" {
  features {}
}

variable "nsgs" {
  type = map(object({
    name                = string
    location            = string
    resource_group_name = string
    tags                = optional(map(string))
    security_rules = optional(list(object({
      name                                       = string
      access                                     = string
      direction                                  = string
      priority                                   = string
      protocol                                   = string
      description                                = optional(string)
      source_address_prefix                      = optional(string)
      source_address_prefixes                    = optional(list(string))
      source_application_security_group_ids      = optional(list(string))
      source_port_range                          = optional(string)
      source_port_ranges                         = optional(list(string))
      destination_address_prefix                 = optional(string)
      destination_address_prefixes               = optional(list(string))
      destination_application_security_group_ids = optional(list(string))
      destination_port_range                     = optional(string)
      destination_port_ranges                    = optional(list(string))
    })))
  }))
  default = {
    "nsg-app1" = {
      "location"            = "australiaeast"
      "name"                = "nsg-app1"
      "resource_group_name" = "nsgtest"
      "security_rules" = [
        {
          "access"                     = "Allow"
          "description"                = "rule1"
          "destination_address_prefix" = "*"
          "destination_port_range"     = "80"
          "direction"                  = "Inbound"
          "name"                       = "rule1"
          "priority"                   = 130
          "protocol"                   = "*"
          "source_address_prefix"      = "*"
          "source_port_range"          = "*"
        },
        {
          "access"                     = "Allow"
          "description"                = "rule2"
          "destination_address_prefix" = "*"
          "destination_port_range"     = "443"
          "direction"                  = "Inbound"
          "name"                       = "rule2"
          "priority"                   = 131
          "protocol"                   = "Tcp"
          "source_address_prefix"      = "*"
          "source_port_range"          = "*"
        },
        {
          "access"                     = "Allow"
          "description"                = "rule3"
          "destination_address_prefix" = "*"
          "destination_port_range"     = "3389"
          "direction"                  = "Inbound"
          "name"                       = "rule3"
          "priority"                   = 132
          "protocol"                   = "Tcp"
          "source_address_prefix"      = "*"
          "source_port_range"          = "*"
        },
        {
          "access"      = "Allow"
          "description" = "rule4"
          "destination_address_prefixes" = [
            "10.0.1.0/24",
          ]
          "destination_port_range" = "3388"
          "direction"              = "Inbound"
          "name"                   = "rule4"
          "priority"               = 133
          "protocol"               = "Tcp"
          "source_address_prefixes" = [
            "10.1.0.0/24",
            "192.168.10.0/24",
          ]
          "source_port_range" = "*"
        },
        {
          "access"                     = "Allow"
          "description"                = "rule5"
          "destination_address_prefix" = "*"
          "destination_port_range"     = "3390"
          "direction"                  = "Inbound"
          "name"                       = "rule5"
          "priority"                   = 134
          "protocol"                   = "Tcp"
          "source_address_prefix"      = "*"
          "source_port_range"          = "*"
        },
        {
          "access"      = "Allow"
          "description" = "rule6"
          "destination_address_prefixes" = [
            "10.0.1.0/24",
          ]
          "destination_port_range" = "1433"
          "direction"              = "Inbound"
          "name"                   = "rule6"
          "priority"               = 135
          "protocol"               = "Tcp"
          "source_address_prefixes" = [
            "10.2.0.0/24",
            "192.168.10.0/24",
          ]
          "source_port_range" = "*"
        },
      ]
      "tags" = null
    }
  }
}

resource "azurerm_resource_group" "this" {
  name     = "nsgtest"
  location = "australiaeast"
}

resource "azurerm_network_security_group" "mynsg" {
  depends_on          = [azurerm_resource_group.this]
  for_each            = var.nsgs
  name                = each.value.name
  location            = each.value.location
  resource_group_name = each.value.resource_group_name
  tags                = each.value.tags

  dynamic "security_rule" {
    for_each = each.value.security_rules
    content {
      name                                       = security_rule.value.name
      access                                     = security_rule.value.access
      direction                                  = security_rule.value.direction
      priority                                   = security_rule.value.priority
      protocol                                   = security_rule.value.protocol
      description                                = try(security_rule.value.description, null)
      source_address_prefix                      = try(security_rule.value.source_address_prefix, null)
      source_address_prefixes                    = try(security_rule.value.source_address_prefixes, null)
      source_port_range                          = try(security_rule.value.source_port_range, null)
      source_port_ranges                         = try(security_rule.value.source_port_ranges, null)
      destination_address_prefix                 = try(security_rule.value.destination_address_prefix, null)
      destination_address_prefixes               = try(security_rule.value.destination_address_prefixes, null)
      destination_port_range                     = try(security_rule.value.destination_port_range, null)
      destination_port_ranges                    = try(security_rule.value.destination_port_ranges, null)
    }
  }
}

Debug Output/Panic Output

https://gist.github.com/kahawai-sre/4d807bb37957dce9ae66730b0dfe884f#file-gistfile1-txt

Expected Behaviour

Initial apply works correctly - the NSG and inline rules are created as per var.nsgs.

If a rule is deleted from var.nsgs security_rules, for example "rule3", terraform plan/apply should show the only change being the deletion of rule3.

This behavior should be consistent irrespective of whether any of the rules contain properties with lists (e.g. "source_address_prefixes"), dynamic lookups, or basic strings.

Actual Behaviour

After the initial apply/deployment succeeds, If "rule3" is removed from the security_rules section of var.nsgs, and plan or apply run subsequently, plan shows rules other than "rule3" being deleted and re-created. Specifically, in the example provided, deleting rule3 from security_rules after initial apply, and doing a second plan/apply, shows "rule4" and "rule6" being deleted and re-created, but not rule5.

From my testing this only happens when the list of security_rules contains rules with fields of type list(string) e.g. "source_address_prefixes" and "destination_address_prefixes" in the examples above.

If the rules in var.nsgs contain only simple string-based properties (no properties with data type = list(string), any rule in the list can be removed, and plan or apply correctly shows just that rule being removed as would be expected.

Steps to Reproduce

  1. Remove "rule3" from the security_rules section of variable "nsgs"
  2. Re-run terraform plan or apply
  3. Observe the plan output showing "rule4" and "rule6" being deleted and recreated (not expected) alongside the deletion of rule3 (expected) This makes reviewing the plan problematic especially in scenarios where they may be a large number of rules, and does not inspire confidence because valid rules appear to be deleted.
  4. Re-test by doing a TF destroy, changing the list of security_rules to remove any list-based fields, apply, remove a rule in the middle of the list again, plan/apply and observe the plan output show just the rule being deleted.

Have I produced this issue through coding, or is it a known issue or bug?

Important Factoids

In this instance azurerm_network_security_rule is not a viable alternative because is does not track drift in rules created on an NSG outside of the terraform deployment, and that breaks compliance.

References

No response

kahawai-sre commented 1 year ago

@magodo or @tombuildsstuff any chance you can provide some brief review and feedback on this issue?

magodo commented 1 year ago

This duplicates to https://github.com/hashicorp/terraform/issues/32991, which shall be fixed once we migrate to the new plugin framework.

kahawai-sre commented 1 year ago

Thanks for taking the time to comment @magodo I see that particular issue can be worked around by downgrading the terraform binary to < v1.4, but that does not seem to help in this case. For example I see the same issue with e.g. v1.3.9. Questions:

magodo commented 1 year ago

@kahawai-sre I just found an old issue that asking the same thing: https://github.com/hashicorp/terraform-provider-azurerm/issues/5819. From my previous comment: https://github.com/hashicorp/terraform-provider-azurerm/issues/5819#issuecomment-589553259, the issue should still resides in the legacy SDK, and you can mitigate it by:

set the Default value for each optional string property inside security_rule schema

I've verified that works with TF v1.5.0 and the latest provider locally.

For the new plugin framework integration, is there a timeline for that?

Unfortunately, no.

kahawai-sre commented 1 year ago

Thanks @magodo - that works for me 👍

For reference I updated the defaults for each property to either "" (string) or [] (list(string)), and removed the try() handlers and this resolved the issue. Thanks again!

variable "nsgs" {
  type = map(object({
    name                = string
    location            = string
    resource_group_name = string
    tags                = optional(map(string))
    security_rules = optional(list(object({
      name                    = string
      access                  = string
      direction               = string
      priority                = number
      protocol                = string
      description             = optional(string, "")
      source_address_prefix   = optional(string, "")
      source_address_prefixes = optional(list(string), [])
      source_application_security_group_ids = optional(list(string), [])
      source_port_range                     = optional(string, "")
      source_port_ranges                    = optional(list(string), [])
      destination_address_prefix            = optional(string, "")
      destination_address_prefixes          = optional(list(string), [])
      destination_application_security_group_ids = optional(list(string), [])
      destination_port_range                     = optional(string, "")
      destination_port_ranges                    = optional(list(string), [])
    })))
  })) .... }
kahawai-sre commented 1 year ago

@tombuildsstuff and @metacpp please note that the same issue exists with "route" under _azurerm_routetable (inline routes). Specifically _next_hop_in_ipaddress is set by the provider/API to "" after apply for any routes that do not specify this. So if we delete an inline route, any routes that have not specified a value for next_hop_in_ip_address are deleted and recreated. In this instance we cannot implement the fix described for inline NSG rules above because - for some reason - a validation has been added to the provider preventing passing an empty string "" as the value for this property (refer https://github.com/hashicorp/terraform-provider-azurerm/issues/2736 for example). So, we do not specify, or pass null, but subsequently next_hop_in_ip_address is set to "" in the state file, and - as above - rule changes compare null with "" and then delete and recreate all routes where that optional property not used.

Given users are relying on inline NSG rules and routes - partly because it is documented as a viable method - and partly - at least for NSG rules - because the distinct _azurerm_network_securityrule option does not enforce drift detection for rules that might be made out of band, and in certain environments this breaks compliance controls. It would be great if this can be kept in mind for any future changes to either make the capability more robust (if needed) or at least not break current state described in the issue / fix above. i.e.: