splunk-terraform / terraform-provider-signalfx

Terraform SignalFx provider
https://www.terraform.io/docs/providers/signalfx/
Mozilla Public License 2.0
31 stars 71 forks source link

Differences between Resources after terraform apply and Actual resources created in SignalFX dashboard. #242

Closed anujshukla-1987 closed 4 years ago

anujshukla-1987 commented 4 years ago

Greetings,

Terraform Version : 0.13 SignalFX Module Version : 4.26.4

Via Terraform we are creating a Dashboard Group , 22 Dashboards under that dashboard group and 5 charts inside each Dashboard. When I do terraform apply it rightly creates 133 Resources so 1(Dashboard Group) + 22(Dashboards) + 22*5(Dashboard Charts) sucessfully.

But when i go to the Signalfx Dashboards i only see 8-15 dashboards under that dashboard group and it misses dashboards and charts randomly.

if i create each dashboard separately it works fine and all dashboards are created perfectly under that group. But having them created automatically is giving this issue.

Attaching relevant source code for this. Please note it is only part of working code.

Terraform Config `locals { services = toset([ "account" , "basket" , "c2sap-datahub" , "checkout" , "hybris" , "identity" , "landing-page-backend" , "layout" , "maakk-brico" , "maakk-frontend" , "maakk-praxis" , "merge-service" , "oaarsi" , "pim-adapter" , "product-backend" , "products" , "search-frontend" , "search-service" , "simsadapter" , "stores" , "workshop-brico" , "workshop-praxis" ]) }

data "aws_secretsmanager_secret_version" "signalfx_token" { secret_id = "signalfx_token" }

provider "signalfx" { auth_token = data.aws_secretsmanager_secret_version.signalfx_token.secret_string api_url = "https://app.eu0.signalfx.com" }

resource "signalfx_dashboard_group" "dashboard_group_services" { name = "Service SLI Dashboard Group" description = "This Dashboard is for service SLI dashboards" }

module "signalfx-service-dashboard" { source = "git::git@bitbucket.org:maxedadiy/signalfx-service-dashboard-terraform.git?ref=SYST-2441"

for_each = local.services serviceName = each.key dashboard_group = signalfx_dashboard_group.dashboard_group_services.id }`

signalfx-service-dashboard-terraform module config

`resource "signalfx_time_chart" "CPU_Utilization" { name = "CPU Utilization %"

program_text = <<-EOF CPUUtilization = data('CPUUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ClusterName', 'app-prd01') and filter('ServiceName', '${var.serviceName}-prd01')).publish(label='CPUUtilization') EOF

time_range = 60

plot_type = "LineChart" show_data_markers = true

viz_options { label = "CPUUtilization" axis = "left" color = "green" } }

resource "signalfx_time_chart" "Memory_Utilization" { name = "Memory Utilization %"

program_text = <<-EOF MemoryUtilization = data('MemoryUtilization', filter=filter('ClusterName', 'app-prd01') and filter('ServiceName', '${var.serviceName}-prd01') and filter('namespace', 'AWS/ECS') and filter('stat', 'mean')).publish(label='MemoryUtilization') EOF

time_range = 60

plot_type = "LineChart" show_data_markers = true

viz_options { label = "MemoryUtilization" axis = "left" color = "green" } }

resource "signalfx_time_chart" "Error_Rate" { name = "Error Rate %"

program_text = <<-EOF A = data('spans.count', filter=filter('kind', 'SERVER') and filter('cluster', 'prd') and filter('service', '${var.serviceName}') and filter('error', 'false')).sum(by=['service']).publish(label='A', enable=False) B = data('spans.count', filter=filter('kind', 'SERVER') and filter('cluster', 'prd') and filter('service', '${var.serviceName}')).sum(by=['service']).publish(label='B', enable=False) ErrorRate = (100*(B-A)/B).publish(label='ErrorRate') EOF

time_range = 60

plot_type = "LineChart" show_data_markers = true

viz_options { label = "ErrorRate" axis = "left" color = "green" } }

resource "signalfx_time_chart" "Request_Rate" { name = "Request Rate"

program_text = <<-EOF RequestsPersec = data('spans.count', filter=filter('cluster', 'prd') and filter('service', '${var.serviceName}') and filter('kind', 'SERVER'), rollup='rate').sum(by=['service']).publish(label='RequestsPersec') EOF

time_range = 60

plot_type = "LineChart" show_data_markers = true

viz_options { label = "RequestsPersec" axis = "left" color = "green" } }

resource "signalfx_time_chart" "P99_Latency" { name = "P99 Latency"

program_text = <<-EOF Latency = data('spans.duration.ns.p99', filter=filter('cluster', 'prd') and filter('service', '${var.serviceName}') and filter('kind', 'SERVER')).mean(by=['service']).publish(label='Latency') EOF

time_range = 60

plot_type = "LineChart" show_data_markers = true

viz_options { label = "Latency" axis = "left" color = "green" value_unit = "Nanosecond" } }

resource "signalfx_dashboard" "serviceDashboard" { name = "${var.serviceName} SLI" dashboard_group = var.dashboard_group

time_range = "-30m"

grid { chart_ids = [signalfx_time_chart.CPU_Utilization.id , signalfx_time_chart.Memory_Utilization.id , signalfx_time_chart.Error_Rate.id , signalfx_time_chart.Request_Rate.id , signalfx_time_chart.P99_Latency.id] width = 6 } }`

mgaidia commented 4 years ago

Run terraform with parallelism set to 1 This workaround solved the same issue for me.

anujshukla-1987 commented 4 years ago

Thanks mgaidia for the workaround .. it works !!