grafana / alloy

OpenTelemetry Collector distribution with programmable pipelines
https://grafana.com/oss/alloy
Apache License 2.0
1.34k stars 187 forks source link

Alloy drop rules not dropping #1843

Open calebAtIspot opened 4 days ago

calebAtIspot commented 4 days ago

What's wrong?

I have several drop rules in discovery.relabel "kubernetes_nodes", but only the first one appears to drop the metric.

Steps to reproduce

  1. Deploy alloy helm chart 0.7.0
  2. send metrics to prometheus with several drop rules Expected result: all metrics specified in drop rules dropped Actual result: only the metric in the first drop rule dropped

System information

Ubuntu 23.10

Software version

1.3.1

Configuration

Values passed to helm chart:

file1:

alloy:
  enabled: false
  alloy:
    clustering:
      enabled: true
    enableReporting: false  
    resources:
      limits:
        cpu: 4
        memory: 30Gi
      requests:
        cpu: 2
        memory: 30Gi
  rbac:
    create: true
  controller:
    type: 'statefulset'
    replicas: 2
    enableStatefulSetAutoDeletePVC: true
    autoscaling:
      enabled: true
      minReplicas: 2
      maxReplicas: 10
      targetMemoryUtilizationPercentage: 60
      scaleUp:
          policies:
            - type: Pods
              value: 2
              periodSeconds: 60

file2:

alloy:
  enabled: true
  alloy:
    configMap:
      create: true
      content: |-

        discovery.kubernetes "kubernetes_nodes" {
          role = "node"
        }

        discovery.kubernetes "kubernetes_nodes_cadvisor" {
          role = "node"
        }

        discovery.kubernetes "kubernetes_service_endpoints" {
          role = "endpoints"
        }

        discovery.kubernetes "kubernetes_service_endpoints_slow" {
          role = "endpoints"
        }

        discovery.kubernetes "kubernetes_services" {
          role = "service"
        }

        discovery.kubernetes "kubernetes_pods" {
          role = "pod"
        }

        discovery.kubernetes "kubernetes_pods_slow" {
          role = "pod"
        }

        discovery.relabel "kubernetes_nodes" {
                targets = discovery.kubernetes.kubernetes_nodes.targets

                rule {
                        source_labels = ["__name__"]
                        regex  = "(kubelet_runtime_duration_seconds_bucket)"
                        action = "drop"
                }

                rule {
                        source_labels = ["__name__"]
                        regex  = "(kubelet_http_requests_duration_seconds_bucket)"
                        action = "drop"
                }

                rule {
                        source_labels = ["__name__"]
                        regex  = "(kubernetes_feature_enabled)"
                        action = "drop"
                }

                rule {
                        regex  = "__meta_kubernetes_node_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        target_label = "__address__"
                        replacement  = "kubernetes.default.svc:443"
                }

                rule {
                        source_labels = ["__meta_kubernetes_node_name"]
                        regex         = "(.+)"
                        target_label  = "__metrics_path__"
                        replacement   = "/api/v1/nodes/$1/proxy/metrics"
                }
        }

        discovery.relabel "kubernetes_nodes_cadvisor" {
                targets = discovery.kubernetes.kubernetes_nodes_cadvisor.targets

                rule {
                        regex  = "__meta_kubernetes_node_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        target_label = "__address__"
                        replacement  = "kubernetes.default.svc:443"
                }

                rule {
                        source_labels = ["__meta_kubernetes_node_name"]
                        regex         = "(.+)"
                        target_label  = "__metrics_path__"
                        replacement   = "/api/v1/nodes/$1/proxy/metrics/cadvisor"
                }
        }

        discovery.relabel "kubernetes_service_endpoints" {
                targets = discovery.kubernetes.kubernetes_service_endpoints.targets

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_scrape"]
                        regex         = "true"
                        action        = "keep"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_scrape_slow"]
                        regex         = "true"
                        action        = "drop"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_scheme"]
                        regex         = "(https?)"
                        target_label  = "__scheme__"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_path"]
                        regex         = "(.+)"
                        target_label  = "__metrics_path__"
                }

                rule {
                        source_labels = ["__address__", "__meta_kubernetes_service_annotation_prometheus_io_port"]
                        regex         = "(.+?)(?::\\d+)?;(\\d+)"
                        target_label  = "__address__"
                        replacement   = "$1:$2"
                }

                rule {
                        regex       = "__meta_kubernetes_service_annotation_prometheus_io_param_(.+)"
                        replacement = "__param_$1"
                        action      = "labelmap"
                }

                rule {
                        regex  = "__meta_kubernetes_service_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        source_labels = ["__meta_kubernetes_namespace"]
                        target_label  = "namespace"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_name"]
                        target_label  = "service"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_node_name"]
                        target_label  = "node"
                }
        }

        discovery.relabel "kubernetes_service_endpoints_slow" {
                targets = discovery.kubernetes.kubernetes_service_endpoints_slow.targets

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_scrape_slow"]
                        regex         = "true"
                        action        = "keep"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_scheme"]
                        regex         = "(https?)"
                        target_label  = "__scheme__"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_path"]
                        regex         = "(.+)"
                        target_label  = "__metrics_path__"
                }

                rule {
                        source_labels = ["__address__", "__meta_kubernetes_service_annotation_prometheus_io_port"]
                        regex         = "(.+?)(?::\\d+)?;(\\d+)"
                        target_label  = "__address__"
                        replacement   = "$1:$2"
                }

                rule {
                        regex       = "__meta_kubernetes_service_annotation_prometheus_io_param_(.+)"
                        replacement = "__param_$1"
                        action      = "labelmap"
                }

                rule {
                        regex  = "__meta_kubernetes_service_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        source_labels = ["__meta_kubernetes_namespace"]
                        target_label  = "namespace"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_name"]
                        target_label  = "service"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_node_name"]
                        target_label  = "node"
                }
        }

        discovery.relabel "kubernetes_services" {
                targets = discovery.kubernetes.kubernetes_services.targets

                rule {
                        source_labels = ["__meta_kubernetes_service_annotation_prometheus_io_probe"]
                        regex         = "true"
                        action        = "keep"
                }

                rule {
                        source_labels = ["__address__"]
                        target_label  = "__param_target"
                }

                rule {
                        target_label = "__address__"
                        replacement  = "blackbox"
                }

                rule {
                        source_labels = ["__param_target"]
                        target_label  = "instance"
                }

                rule {
                        regex  = "__meta_kubernetes_service_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        source_labels = ["__meta_kubernetes_namespace"]
                        target_label  = "namespace"
                }

                rule {
                        source_labels = ["__meta_kubernetes_service_name"]
                        target_label  = "service"
                }
        }

        discovery.relabel "kubernetes_pods" {
                targets = discovery.kubernetes.kubernetes_pods.targets

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
                        regex         = "true"
                        action        = "keep"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow"]
                        regex         = "true"
                        action        = "drop"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scheme"]
                        regex         = "(https?)"
                        target_label  = "__scheme__"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
                        regex         = "(.+)"
                        target_label  = "__metrics_path__"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
                        regex         = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})"
                        target_label  = "__address__"
                        replacement   = "[$2]:$1"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
                        regex         = "(\\d+);((([0-9]+?)(\\.|$)){4})"
                        target_label  = "__address__"
                        replacement   = "$2:$1"
                }

                rule {
                        regex       = "__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)"
                        replacement = "__param_$1"
                        action      = "labelmap"
                }

                rule {
                        regex  = "__meta_kubernetes_pod_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        source_labels = ["__meta_kubernetes_namespace"]
                        target_label  = "namespace"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_name"]
                        target_label  = "pod"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_phase"]
                        regex         = "Pending|Succeeded|Failed|Completed"
                        action        = "drop"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_node_name"]
                        target_label  = "node"
                }
        }

        discovery.relabel "kubernetes_pods_slow" {
                targets = discovery.kubernetes.kubernetes_pods_slow.targets

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow"]
                        regex         = "true"
                        action        = "keep"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scheme"]
                        regex         = "(https?)"
                        target_label  = "__scheme__"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
                        regex         = "(.+)"
                        target_label  = "__metrics_path__"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
                        regex         = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})"
                        target_label  = "__address__"
                        replacement   = "[$2]:$1"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"]
                        regex         = "(\\d+);((([0-9]+?)(\\.|$)){4})"
                        target_label  = "__address__"
                        replacement   = "$2:$1"
                }

                rule {
                        regex       = "__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)"
                        replacement = "__param_$1"
                        action      = "labelmap"
                }

                rule {
                        regex  = "__meta_kubernetes_pod_label_(.+)"
                        action = "labelmap"
                }

                rule {
                        source_labels = ["__meta_kubernetes_namespace"]
                        target_label  = "namespace"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_name"]
                        target_label  = "pod"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_phase"]
                        regex         = "Pending|Succeeded|Failed|Completed"
                        action        = "drop"
                }

                rule {
                        source_labels = ["__meta_kubernetes_pod_node_name"]
                        target_label  = "node"
                }
        }

        prometheus.scrape "prometheus" {
                targets = [{
                        __address__ = "localhost:9090",
                }]
                forward_to = [prometheus.remote_write.default.receiver]
                job_name   = "prometheus"
                clustering {
                        enabled = true
                } 
        }

        prometheus.scrape "kubernetes_nodes" {
                targets    = discovery.relabel.kubernetes_nodes.output
                forward_to = [prometheus.remote_write.default.receiver]
                job_name   = "kubernetes-nodes"
                scheme     = "https"

                authorization {
                        type             = "Bearer"
                        credentials_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
                }

                tls_config {
                        ca_file              = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
                        insecure_skip_verify = true
                }
                clustering {
                        enabled = true
                }
        }

        prometheus.scrape "kubernetes_nodes_cadvisor" {
                targets    = discovery.relabel.kubernetes_nodes_cadvisor.output
                forward_to = [prometheus.remote_write.default.receiver]
                job_name   = "kubernetes-nodes-cadvisor"
                scheme     = "https"

                authorization {
                        type             = "Bearer"
                        credentials_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
                }

                tls_config {
                        ca_file              = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
                        insecure_skip_verify = true
                }
                clustering {
                        enabled = true
                }
        }

        prometheus.scrape "kubernetes_service_endpoints" {
                targets      = discovery.relabel.kubernetes_service_endpoints.output
                forward_to   = [prometheus.remote_write.default.receiver]
                job_name     = "kubernetes-service-endpoints"
                honor_labels = true
                clustering {
                        enabled = true
                }
        }

        prometheus.scrape "kubernetes_service_endpoints_slow" {
                targets         = discovery.relabel.kubernetes_service_endpoints_slow.output
                forward_to      = [prometheus.remote_write.default.receiver]
                job_name        = "kubernetes-service-endpoints-slow"
                honor_labels    = true
                scrape_interval = "5m0s"
                scrape_timeout  = "30s"
                clustering {
                        enabled = true
                }
        }

        prometheus.scrape "kubernetes_services" {
                targets      = discovery.relabel.kubernetes_services.output
                forward_to   = [prometheus.remote_write.default.receiver]
                job_name     = "kubernetes-services"
                honor_labels = true
                params       = {
                        module = ["http_2xx"],
                }
                metrics_path = "/probe"
                clustering {
                        enabled = true
                }
        }

        prometheus.scrape "kubernetes_pods" {
                targets      = discovery.relabel.kubernetes_pods.output
                forward_to   = [prometheus.remote_write.default.receiver]
                job_name     = "kubernetes-pods"
                honor_labels = true
                clustering {
                        enabled = true
                }
        }

        prometheus.scrape "kubernetes_pods_slow" {
                targets         = discovery.relabel.kubernetes_pods_slow.output
                forward_to      = [prometheus.remote_write.default.receiver]
                job_name        = "kubernetes-pods-slow"
                honor_labels    = true
                scrape_interval = "5m0s"
                scrape_timeout  = "30s"
                clustering {
                        enabled = true
                }
        }

        prometheus.remote_write "default" {
                external_labels = {
                        cluster = "dev-audience-1",
                }

                endpoint {
                        url = "https://aps-workspaces.us-east-1.amazonaws.com/workspaces/censored/api/v1/remote_write"

                        queue_config {
                                capacity             = 5000
                                max_shards           = 300
                                max_samples_per_send = 2000
                        }

                        metadata_config { }

                        sigv4 {
                                region = "us-east-1"
                        }
                        send_exemplars = false
                        send_native_histograms = false
                }
        }
  serviceAccount:
    create: true 
    name: "amp-ingest-service-account"
    annotations: 
      eks.amazonaws.com/role-arn: "censored"

Logs

The logs in the last 30 minutes are just "rejoining peers" messages

calebAtIspot commented 4 days ago

... Turns out I typo'd "kubelet_runtime_duration_seconds_bucket", it should actually be kubelet_runtime_operations_duration_seconds_bucket 🤦 . A metric for kubelet_runtime_duration_seconds_bucket never existed in the first place.

In that case we still have a problem because the other metrics are not being dropped. Maybe it should be regex = "metric_name" instead of regex = "(metric_name)"?