hashicorp / terraform-provider-google

Terraform Provider for Google Cloud Platform
https://registry.terraform.io/providers/hashicorp/google/latest/docs
Mozilla Public License 2.0
2.33k stars 1.74k forks source link

5.X Performance Issues for resource_access_context_manager_service_perimeter #19474

Open tlines2016 opened 1 month ago

tlines2016 commented 1 month ago

Community Note

Terraform Version & Provider Version(s)

Terraform v1.2.2 & v1.8.3 (both versions yield the same result) on

Affected Resource(s)

Terraform Configuration

Config Before the Change

terraform {
  required_version = "1.2.2"

  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 4.28"
    }
    google-beta = {
      source  = "hashicorp/google-beta"
      version = "~> 4.28"
    }
  }
}

Config After the Change

terraform {
  required_version = "1.2.2" #Attempted "1.8.3" with the same results

  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 5.44"
    }
    google-beta = {
      source  = "hashicorp/google-beta"
      version = "~> 5.44"
    }
  }
}

Terraform Resource Config

locals {
  access_policy_id = var.create_access_context_manager_access_policy ? "accessPolicies/${google_access_context_manager_access_policy.access_policy[0].name}" : "accessPolicies/${var.access_policy_id}"
}

resource "google_access_context_manager_access_policy" "access_policy" {
  count  = var.create_access_context_manager_access_policy ? 1 : 0
  parent = "organizations/${var.org_id}"
  title  = "default policy"
}

resource "google_access_context_manager_access_level" "access_level" {
  for_each    = var.vpcsc_access_levels
  parent      = local.access_policy_id
  name        = "${local.access_policy_id}/accessLevels/${each.key}"
  title       = each.key
  description = each.value.description

  basic {
    combining_function = each.value.combining_function
    dynamic "conditions" {
      for_each = each.value.conditions
      iterator = access_level_condition
      content {
        ip_subnetworks         = lookup(access_level_condition.value, "ip_subnetworks", null)
        required_access_levels = try(formatlist("${local.access_policy_id}/accessLevels/%s", lookup(access_level_condition.value, "required_access_levels", null)), null)
        members                = lookup(access_level_condition.value, "members", null)
        negate                 = lookup(access_level_condition.value, "negate", false)
        regions                = lookup(access_level_condition.value, "regions", null)

        dynamic "vpc_network_sources" {
          for_each = lookup(access_level_condition.value, "vpc_network_sources", null) != null ? lookup(access_level_condition.value, "vpc_network_sources", null) : {}
          content {
            vpc_subnetwork {
              network            = "//compute.googleapis.com/projects/${vpc_network_sources.value.network_project_id}/global/networks/${vpc_network_sources.value.vpc_network_name}"
              vpc_ip_subnetworks = lookup(vpc_network_sources.value, "ip_address_ranges", null)
            }
          }
        }
      }
    }
  }
}

resource "google_access_context_manager_service_perimeter" "service_perimeter" {
  for_each       = var.vpcsc_perimeters
  parent         = local.access_policy_id
  name           = "${local.access_policy_id}/servicePerimeters/${each.key}"
  description    = lookup(each.value, "description", "")
  perimeter_type = "PERIMETER_TYPE_REGULAR"
  title          = each.key

  lifecycle {
    ignore_changes = [ // Projects added to Perimeter handled separately
      status[0].resources,
      spec[0].resources
    ]
  }

  ####### [Status (Enforced Perimeter) - Start] #######
  status {
    restricted_services = length(var.restricted_services) > 0 ? var.restricted_services : local.all_vpcsc_services
    access_levels = formatlist("${local.access_policy_id}/accessLevels/%s", each.value.access_levels)

    dynamic "vpc_accessible_services" {
      for_each = contains(var.vpc_accessible_services, "*") ? [] : [var.vpc_accessible_services]
      content {
        enable_restriction = true
        allowed_services   = vpc_accessible_services.value
      }
    } ####################### [Enforced VPC Accessible Services - End] ###############################

    ####################### [Enforced Ingress Policies - Start] ############################
    dynamic "ingress_policies" {
      for_each = each.value.ingress_policies
      iterator = ingress_policies
      content {
        ### [Enforced Ingress From Block] ###
        ingress_from {
          identity_type = lookup(ingress_policies.value.from, "identity_type", null)
          identities    = lookup(ingress_policies.value.from, "identities", null)

          dynamic "sources" {
            for_each = contains(keys(ingress_policies.value.from), "sources") ? merge(
              contains(keys(ingress_policies.value.from["sources"]), "access_levels")
              ? { for v in ingress_policies.value.from.sources.access_levels : v => "access_level" } : {},
              contains(keys(ingress_policies.value.from["sources"]), "resources")
              ? { for v in ingress_policies.value.from.sources.resources : v => "resource" } : {}
            ) : {}
            content {
              access_level = sources.value == "access_level" ? sources.key != "*" ? "${local.access_policy_id}/accessLevels/${sources.key}" : "*" : null
              resource     = sources.value == "resource" ? sources.key : null
            }
          }
        }
        ### [Enforced Ingress To Block] ###
        ingress_to {
          resources = lookup(ingress_policies.value.to, "resources", null)
          dynamic "operations" {
            for_each = ingress_policies.value.to.operations
            content {
              service_name = operations.value.service_name

              dynamic "method_selectors" {
                for_each = operations.value.method_selectors
                content {
                  method     = lookup(method_selectors.value, "method", null)
                  permission = lookup(method_selectors.value, "permission", null)
                }
              }
            }
          }
        } 
      }
    }
    ####################### [Enforced Ingress Policies - End] ############################

    ####################### [Enforced Egress Policies - Start] ############################
    dynamic "egress_policies" {
      for_each = each.value.egress_policies
      iterator = egress_policies
      content {
        egress_from {
          identity_type = lookup(egress_policies.value.from, "identity_type", null)
          identities    = lookup(egress_policies.value.from, "identities", null)

          dynamic "sources" {
            for_each = (
              contains(keys(egress_policies.value.from), "sources") ? (
                contains(keys(egress_policies.value.from.sources), "access_levels")
                ? { for v in egress_policies.value.from.sources.access_levels : v => "access_level" }
                : {}
              )
              : {}
            )
            content {
              access_level = sources.value == "access_level" ? sources.key != "*" ? "${local.access_policy_id}/accessLevels/${sources.key}" : "*" : null
            }
          }
          source_restriction = lookup(egress_policies.value.from, "sources", null) != null ? "SOURCE_RESTRICTION_ENABLED" : null
        }
        egress_to {
          resources          = lookup(egress_policies.value.to, "resources", null)
          external_resources = lookup(egress_policies.value.to, "external_resources", [])

          dynamic "operations" {
            for_each = egress_policies.value.to.operations
            content {
              service_name = operations.value.service_name

              dynamic "method_selectors" {
                for_each = operations.value.method_selectors
                content {
                  method     = lookup(method_selectors.value, "method", null)
                  permission = lookup(method_selectors.value, "permission", null)
                }
              }
            }
          }
        }

      }
    }
    ####################### [Enforced Egress Policies - End] ############################

  }
  ####### [Status (Enforced Perimeter) - End] #######
  ####### [Spec (Dry Run Perimeter) - Start] #######
  spec {
    restricted_services = length(var.restricted_services_dry_run) > 0 ? var.restricted_services_dry_run : local.all_vpcsc_services_dryrun
    access_levels = formatlist("${local.access_policy_id}/accessLevels/%s", each.value.access_levels_dry_run)

    dynamic "vpc_accessible_services" {
      for_each = contains(var.vpc_accessible_services_dry_run, "*") ? [] : [var.vpc_accessible_services_dry_run]
      content {
        enable_restriction = true
        allowed_services   = vpc_accessible_services.value
      }
    } ####################### [Dry Run VPC Accessible Services - End] ###############################

    ####################### [Dry Run Ingress Policies - Start] ############################
    dynamic "ingress_policies" {
      for_each = each.value.ingress_policies_dry_run
      iterator = ingress_policies_dry_run
      content {
        ingress_from {
          identity_type = lookup(ingress_policies_dry_run.value.from, "identity_type", null)
          identities    = lookup(ingress_policies_dry_run.value.from, "identities", null)

          dynamic "sources" {
            for_each = contains(keys(ingress_policies_dry_run.value.from), "sources") ? merge(
              contains(keys(ingress_policies_dry_run.value.from["sources"]), "access_levels")
              ? { for v in ingress_policies_dry_run.value.from.sources.access_levels : v => "access_level" } : {},
              contains(keys(ingress_policies_dry_run.value.from["sources"]), "resources")
              ? { for v in ingress_policies_dry_run.value.from.sources.resources : v => "resource" } : {}
            ) : {}
            content {
              access_level = sources.value == "access_level" ? sources.key != "*" ? "${local.access_policy_id}/accessLevels/${sources.key}" : "*" : null
              resource     = sources.value == "resource" ? sources.key : null
            }
          }
        }
        ### [Dryrun Ingress To Block] ###
        ingress_to {
          resources = lookup(ingress_policies_dry_run.value.to, "resources", null)

          dynamic "operations" {
            for_each = ingress_policies_dry_run.value.to.operations
            content {
              service_name = operations.value.service_name

              dynamic "method_selectors" {
                for_each = operations.value.method_selectors
                content {
                  method     = lookup(method_selectors.value, "method", null)
                  permission = lookup(method_selectors.value, "permission", null)
                }
              }
            }
          }
        } 
      }
    }
    ####################### [Dry Run Ingress Policies - End] ############################

    ####################### [Dry Run Egress Policies - Start] ############################
    dynamic "egress_policies" {
      for_each = each.value.egress_policies_dry_run
      iterator = egress_policies_dry_run
      content {
        egress_from {
          identity_type = lookup(egress_policies_dry_run.value.from, "identity_type", null)
          identities    = lookup(egress_policies_dry_run.value.from, "identities", null)
          dynamic "sources" {
            for_each = (
              contains(keys(egress_policies_dry_run.value.from), "sources") ? (
                contains(keys(egress_policies_dry_run.value.from.sources), "access_levels")
                ? { for v in egress_policies_dry_run.value.from.sources.access_levels : v => "access_level" }
                : {}
              )
              : {}
            )
            content {
              access_level = sources.value == "access_level" ? sources.key != "*" ? "${local.access_policy_id}/accessLevels/${sources.key}" : "*" : null
            }
          }
          source_restriction = lookup(egress_policies_dry_run.value.from, "sources", null) != null ? "SOURCE_RESTRICTION_ENABLED" : null
        }

        egress_to {
          resources          = lookup(egress_policies_dry_run.value.to, "resources", null)
          external_resources = lookup(egress_policies_dry_run.value.to, "external_resources", [])

          dynamic "operations" {
            for_each = egress_policies_dry_run.value.to.operations
            content {
              service_name = operations.value.service_name

              dynamic "method_selectors" {
                for_each = operations.value.method_selectors
                content {
                  method     = lookup(method_selectors.value, "method", null)
                  permission = lookup(method_selectors.value, "permission", null)
                }
              }
            }
          }
        }

      }
    }
    ####################### [Dry Run Egress Policies - End] ############################
  }
  use_explicit_dry_run_spec = (length(var.restricted_services_dry_run) > 0 || length(each.value.access_levels_dry_run) > 0 || !contains(var.vpc_accessible_services_dry_run, "*"))

  depends_on = [
    google_access_context_manager_access_level.access_level
  ]

}

Terraform Module

Just to provide a scale of the existing VPC-SC Service Perimeter which is currently being managed by the above resource has both the Enforced Perimeter and Dry Run Perimeter created. This Perimeter is rather large, with a 15,000+ Projects within Enforced, and these same 15,000+ projects are also in Dry Run. The management of projects within the Perimeter is handled by a separate API, and not the above resource.

We also have 40+ Ingress Policies within both enforced and dry run, in addition to 40+ Egress Policies within both as well. Each Ingress/Egress Policy contains a varying number of Identities and Resources coming out to a total of 2,500+ Ingress/Egress Attributes.

With the scale in mind, below is just a small portion of the Terraform Module.

### main.tf file
module "vpcsc" {
  source = "xxx"
  org_id                                      = var.org_id
  access_policy_id                            = var.access_policy_id
  vpcsc_perimeters                            = var.vpcsc_perimeters
  vpcsc_access_levels                         = var.vpcsc_access_levels
  create_access_context_manager_access_policy = var.create_access_context_manager_access_policy
}
### terraform.tfvars file
org_id = "11223344"
access_levels = {}
vpcsc_perimeters = {
  "prod_vpcsc_perimeter" = {
    description    = "Org Service Perimeter"
    access_levels  = []
    ingress_policies = [
      {
        from = {
          identities = [
            "serviceAccount:xxx@xxx.iam.gserviceaccount.com",
          ]
          sources = {
            access_levels = ["*"]
          }
        }
        to = {
          resources = [
            "projects/xxx",
          ]
          operations = [{
            service_name = "storage.googleapis.com"
            method_selectors = [{
              method = "*"
            }]
          }]
        }
      },
      # List Continues with 40+ Ingress Policies defiend with varying numbers of identities and resources
    ]
    egress_policies = [
      {
        from = {
          identity_type = "ANY_IDENTITY"
        }
        to = {
          resources = [
            "projects/xxxx",
            "projects/yyyy",
          ]
          operations = [
            {
              service_name     = "artifactregistry.googleapis.com"
              method_selectors = [{ method = "*" }]
            },
            {
              service_name     = "storage.googleapis.com"
              method_selectors = [{ method = "*" }]
            }
          ]
        }
      },
      # List continues with 40+ Egress Policies Defined with varying numbers of identities and resources
    ]
    ingress_policies_dry_run = [
      {
        from = {
          identities = [
            "serviceAccount:xxx@xxx.iam.gserviceaccount.com",
          ]
          sources = {
            access_levels = ["*"]
          }
        }
        to = {
          resources = [
            "projects/xxx",
          ]
          operations = [{
            service_name = "storage.googleapis.com"
            method_selectors = [{
              method = "*"
            }]
          }]
        }
      },
      # List Continues with 40+ Ingress Policies defiend with varying numbers of identities and resources
    ]
    egress_policies_dry_run = [
      {
        from = {
          identity_type = "ANY_IDENTITY"
        }
        to = {
          resources = [
            "projects/xxxx",
            "projects/yyyy",
          ]
          operations = [
            {
              service_name     = "artifactregistry.googleapis.com"
              method_selectors = [{ method = "*" }]
            },
            {
              service_name     = "storage.googleapis.com"
              method_selectors = [{ method = "*" }]
            }
          ]
        }
      },
      # List continues with 40+ Egress Policies Defined with varying numbers of identities and resources
    ]
  }
}

Debug Output

Below was a test with provider v5.40.0, however the same results take place for v5.44.0 as well. The dag/walk step continously takes place over and over for 20+ minutes until it finally completes.

[TRACE] vertex "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter": entering dynamic subgraph
[TRACE] vertex "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter[\"prod_vpcsc_perimeter\"]": starting visit (*terraform.NodePlannableResourceInstance)
[TRACE] readResourceInstanceState: reading state for module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]
[TRACE] upgradeResourceState: schema version of module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"] is still 0; calling provider "google" for any other minor fixups
[TRACE] GRPCProvider: UpgradeResourceState
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Received request: tf_provider_addr=registry.terraform.io/hashicorp/google tf_req_id=12a22123-a11b-3344-c555-defr12ab345 tf_rpc=UpgradeResourceState @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/tf5server/server.go:743 @module=sdk.proto tf_proto_version=5.6 tf_resource_type=google_access_context_manager_service_perimeter timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Sending request downstream: tf_rpc=UpgradeResourceState @module=sdk.proto tf_provider_addr=registry.terraform.io/hashicorp/google tf_req_id=12a22123-a11b-3344-c555-defr12ab345 tf_resource_type=google_access_context_manager_service_perimeter @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/internal/tf5serverlogging/downstream_request.go:22 tf_proto_version=5.6 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: calling downstream server: @module=sdk.mux tf_mux_provider=*schema.GRPCProviderServer tf_rpc=UpgradeResourceState @caller=github.com/hashicorp/terraform-plugin-mux@v0.15.0/internal/logging/mux.go:19 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Upgrading JSON state: tf_mux_provider=*schema.GRPCProviderServer tf_req_id=12a22123-a11b-3344-c555-defr12ab345 @module=sdk.helper_schema tf_provider_addr=registry.terraform.io/hashicorp/google tf_resource_type=google_access_context_manager_service_perimeter tf_rpc=UpgradeResourceState @caller=github.com/hashicorp/terraform-plugin-sdk/v2@v2.33.0/helper/schema/grpc_provider.go:365 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Received downstream response: @module=sdk.proto tf_proto_version=5.6 tf_provider_addr=registry.terraform.io/hashicorp/google tf_req_id=12a22123-a11b-3344-c555-defr12ab345 tf_rpc=UpgradeResourceState diagnostic_error_count=0 diagnostic_warning_count=0 tf_req_duration_ms=402 tf_resource_type=google_access_context_manager_service_perimeter @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/internal/tf5serverlogging/downstream_request.go:42 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Served request: tf_req_id=12a22123-a11b-3344-c555-defr12ab345 tf_rpc=UpgradeResourceState @module=sdk.proto tf_proto_version=5.6 tf_provider_addr=registry.terraform.io/hashicorp/google tf_resource_type=google_access_context_manager_service_perimeter @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/tf5server/server.go:762 timestamp=
[TRACE] NodeAbstractResouceInstance.writeResourceInstanceState to prevRunState for module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]
[TRACE] NodeAbstractResouceInstance.writeResourceInstanceState: writing state object for module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]
[TRACE] NodeAbstractResouceInstance.writeResourceInstanceState to refreshState for module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]
[TRACE] NodeAbstractResouceInstance.writeResourceInstanceState: writing state object for module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]
module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]: Refreshing state... [id=accessPolicies/123456543212/servicePerimeters/prod_vpcsc_perimeter]
[TRACE] NodeAbstractResourceInstance.refresh for module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter["prod_vpcsc_perimeter"]
[TRACE] GRPCProvider: ReadResource
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Received request: @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/tf5server/server.go:771 @module=sdk.proto tf_proto_version=5.6 tf_provider_addr=registry.terraform.io/hashicorp/google tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 tf_resource_type=google_access_context_manager_service_perimeter tf_rpc=ReadResource timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: No announced client capabilities: tf_proto_version=5.6 tf_rpc=ReadResource @module=sdk.proto tf_provider_addr=registry.terraform.io/hashicorp/google tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 tf_resource_type=google_access_context_manager_service_perimeter @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/internal/tf5serverlogging/client_capabilities.go:44 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Sending request downstream: tf_rpc=ReadResource @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/internal/tf5serverlogging/downstream_request.go:22 @module=sdk.proto tf_provider_addr=registry.terraform.io/hashicorp/google tf_resource_type=google_access_context_manager_service_perimeter tf_proto_version=5.6 tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: calling downstream server: @module=sdk.mux tf_mux_provider=*schema.GRPCProviderServer tf_rpc=ReadResource @caller=github.com/hashicorp/terraform-plugin-mux@v0.15.0/internal/logging/mux.go:19 timestamp=
[TRACE] dag/walk: vertex "provider[\"registry.terraform.io/hashicorp/google\"] (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Calling downstream: tf_rpc=ReadResource @caller=github.com/hashicorp/terraform-plugin-sdk/v2@v2.33.0/helper/schema/resource.go:1088 @module=sdk.helper_schema tf_mux_provider=*schema.GRPCProviderServer tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 tf_provider_addr=registry.terraform.io/hashicorp/google tf_resource_type=google_access_context_manager_service_perimeter timestamp=
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Waiting for state to become: [success]
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Retry Transport: starting RoundTrip retry loop
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Retry Transport: request attempt 0
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Google API Request Details:
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: ---[ REQUEST ]---------------------------------------
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: GET /v1/accessPolicies/123456543212/servicePerimeters/prod_vpcsc_perimeter?alt=json HTTP/1.1
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Host: accesscontextmanager.googleapis.com
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: User-Agent: Terraform/1.2.2 (+https://www.terraform.io) Terraform-Plugin-SDK/2.33.0 terraform-provider-google/5.40.0
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Content-Type: application/json
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Accept-Encoding: gzip
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: -----------------------------------------------------

[TRACE] dag/walk: vertex "module.vpcsc (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "root" is waiting for "module.vpcsc (close)"
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Google API Response Details:
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: ---[ RESPONSE ]--------------------------------------
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: HTTP/2.0 200 OK
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Cache-Control: private
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Content-Type: application/json; charset=UTF-8
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Date: Thu, 05 Sep 2024 20:07:09 GMT
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Server: ESF
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Vary: Origin
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Vary: X-Origin
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: Vary: Referer
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: X-Content-Type-Options: nosniff
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: X-Frame-Options: SAMEORIGIN
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: X-Xss-Protection: 0
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: {
[DEBUG] provider.terraform-provider-google_v5.40.0_x5:   "name": "accessPolicies/123456543212/servicePerimeters/prod_vpcsc_perimeter",
[DEBUG] provider.terraform-provider-google_v5.40.0_x5:   "title": "prod_vpcsc_perimeter",
[DEBUG] provider.terraform-provider-google_v5.40.0_x5:   "status": {
### Continues and displays entire perimeter                     #THE ENTIRE PERIMETER OUTPUTS HERE
[DEBUG] provider.terraform-provider-google_v5.40.0_x5:   "useExplicitDryRunSpec": true
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: }
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: -----------------------------------------------------
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Retry Transport: Stopping retries, last request was successful
[DEBUG] provider.terraform-provider-google_v5.40.0_x5: 2024/09/05 20:07:09 [DEBUG] Retry Transport: Returning after 1 attempts
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Called downstream: @caller=github.com/hashicorp/terraform-plugin-sdk/v2@v2.33.0/helper/schema/resource.go:1090 @module=sdk.helper_schema tf_mux_provider=*schema.GRPCProviderServer tf_provider_addr=registry.terraform.io/hashicorp/google tf_resource_type=google_access_context_manager_service_perimeter tf_rpc=ReadResource tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Received downstream response: diagnostic_error_count=0 tf_provider_addr=registry.terraform.io/hashicorp/google @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/internal/tf5serverlogging/downstream_request.go:42 diagnostic_warning_count=0 tf_proto_version=5.6 tf_req_duration_ms=3468 tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 tf_resource_type=google_access_context_manager_service_perimeter tf_rpc=ReadResource @module=sdk.proto timestamp=
[TRACE] provider.terraform-provider-google_v5.40.0_x5: Served request: tf_resource_type=google_access_context_manager_service_perimeter tf_rpc=ReadResource @module=sdk.proto tf_proto_version=5.6 tf_provider_addr=registry.terraform.io/hashicorp/google tf_req_id=34b33123-c11f-4345-g656-fgfe23bc456 @caller=github.com/hashicorp/terraform-plugin-go@v0.23.0/tfprotov5/tf5server/server.go:802 timestamp=
[TRACE] dag/walk: vertex "provider[\"registry.terraform.io/hashicorp/google\"] (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "module.vpcsc (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "root" is waiting for "module.vpcsc (close)"
[TRACE] dag/walk: vertex "provider[\"registry.terraform.io/hashicorp/google\"] (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "module.vpcsc (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "root" is waiting for "module.vpcsc (close)"
[TRACE] dag/walk: vertex "provider[\"registry.terraform.io/hashicorp/google\"] (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "module.vpcsc (close)" is waiting for "module.vpcsc.google_access_context_manager_service_perimeter.service_perimeter (expand)"
[TRACE] dag/walk: vertex "root" is waiting for "module.vpcsc (close)"

Expected Behavior

With Provider v4.X, the runtime of Terraform Plan and Terraform Apply is around 10-20 seconds. This includes the "Refreshing State" step for the Service Perimeter resource, which appears to be the step which it's runtime has increased substantially. When bumping the provider version up to v5.X, we expected the runtime to remain somewhat consistent and remain in that 10-20 second range or at least below 1 minute. However, instead when it attempts to refresh it's state, it's gone from 10-20 seconds all the way up to 10 to 20 minutes.

Actual Behavior

When using Provider v5.X for the Access Context Manager Service Perimeter Resource, our Terraform Plan and Apply both have had their runtimes absolutely skyrocket. Particularly during the "Refreshing State" Step for the Service Perimeter Resource. Where it went from that 10-20 seconds to taking 10 to 20 minutes just to refresh the state.

Investigating if any sort of quota limits are being hit, that doesn't appear to the case, and the /GET request to fetch the service perimeter still only takes place once just like it did with provider v4.X.

Potential Reason for Performance Issues

With the bump to v5.x for the terraform-provider-google, the google_access_context_manager_service_perimeter resource has switched from using TypeList to using TypeSet for the majority of lists defined in the resource. Reference: 5.0.0 Upgrade Guide

You can also find the code updates within this commit.

So for every single one of our lists within an Ingress Policies, Egress Policy, in addition to resources within the perimeter, each time the flatten and expand function takes place for each of these variables.

Provider v4.X of Expand & Flatten Functions

func expandAccessContextManagerServicePerimeterSpecResources(v interface{}, d tpgresource.TerraformResourceData, config *transport_tpg.Config) (interface{}, error) {
    return v, nil
}
func flattenAccessContextManagerServicePerimeterSpecResources(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
    return v
}

Provider v5.X Expand and Flatten Functions

func expandAccessContextManagerServicePerimeterSpecResources(v interface{}, d tpgresource.TerraformResourceData, config *transport_tpg.Config) (interface{}, error) {
    v = v.(*schema.Set).List()
    return v, nil
}
func flattenAccessContextManagerServicePerimeterSpecResources(v interface{}, d *schema.ResourceData, config *transport_tpg.Config) interface{} {
    if v == nil {
        return v
    }
    return schema.NewSet(schema.HashString, v.([]interface{}))
}

So now take into account that our Service Perimeter resource has 15,000+ Resources within SpecResources, and then another 15,000+ resources within StatusResources. And each time these flatten and expand functions are being called where they're either converting to a Set with hashstrings, or taking the set and returning a list. Whereas before they always just dealt with lists where no hashing was taking place. Now add that this change to using Sets and hashstrings took place to each of the resources below, and I believe this is what is causing the substantial time increase. Where that time increase has caused the provider v5.X to be unusable.

This is making it so we can not take advantage of new feature releases for the VPC-SC Service such as defining Source -> Access Levels within an Egress Policy.

Steps to reproduce

  1. tf init -input=false
  2. tf plan -input=false
  3. tf apply -input=false --auto-approve

The difficult part for others to reproduce this is the size of the service perimeter that we're dealing with. Such as the 15,000+ Projects.

Also, the performance decrease is taking place during the "Refreshing State" step for the Service Perimeter resource. So an existing perimeter needs to already be created.

Important Factoids

References

b/368651673

ggtisc commented 1 month ago

Not possible to reproduce since it involves thousands of projects

Charlesleonius commented 3 weeks ago

Were looking into this

Charlesleonius commented 2 weeks ago

I've found a way to make ordering not matter for fields without making them sets. I will need to perform some benchmarking to make sure this will increase the performance though.