grafana / alloy

OpenTelemetry Collector distribution with programmable pipelines
https://grafana.com/oss/alloy
Apache License 2.0
982 stars 103 forks source link

Empty graph view with pyroscope componetns #1142

Open keyolk opened 5 days ago

keyolk commented 5 days ago

What's wrong?

With pyroscope configuration, Alloy never works and I can see only a empty graph view in its UI

image

Steps to reproduce

Just using the below configuration in the EKS environment. I deployed it using the official helm chart with daemonset deployment. It has little random status, sometimes I can see the normal graph view. It goes well if I remove the pyroscope stuff.

System information

Linux o11y-alloy-daemonset-hwkhs 6.1.90 #1 SMP Sat Jun 8 02:13:40 UTC 2024 aarch64 aarch64 aarch64 GNU/Linux

Software version

alloy, version v1.1.0 (branch: HEAD, revision: cf46a1491) build user: root@buildkitsandbox build date: 2024-05-14T21:08:34Z go version: go1.22.3 platform: linux/arm64 tags: netgo,builtinassets,promtail_journal_enabled

Configuration

logging {
      level  = "info"
      format = "logfmt"
    }

    discovery.kubernetes "local_pods" {
      role = "pod"
      selectors {
        field = "spec.nodeName=" + env("HOSTNAME")
        role = "pod"
      }
    }

    discovery.relabel "loki" {
      targets = discovery.kubernetes.local_pods.targets

      rule {
        source_labels = ["__meta_kubernetes_pod_annotation_grafana_com_scrape_loki"]
        regex = "true"
        action = "keep"
      }

      rule {
        source_labels = [
          "__meta_kubernetes_namespace",
          "__meta_kubernetes_pod_name",
          "__meta_kubernetes_pod_container_name",
        ]
        action = "replace"
        regex = "(.+);(.+);(.+)"
        target_label = "instance"
        replacement = "${1}/${2}:${3}"
      }

      rule {
        source_labels = [
          "__meta_kubernetes_pod_uid",
          "__meta_kubernetes_pod_container_name",
        ]
        target_label  = "__path__"
        separator     = "/"
        replacement   = "/var/log/pods/*$1/*.log"
      }
    }

    local.file_match "loki" {
      path_targets = discovery.relabel.loki.output
    }

    loki.source.file "loki" {
      targets    = local.file_match.loki.targets
      forward_to = [loki.process.cri.receiver]
    }

    loki.process "cri" {
      forward_to = [loki.write.local.receiver]
      stage.cri {}
      stage.drop {
        older_than = "10m"
        drop_counter_reason = "too old"
      }
    }

    local.file "loki_user" {
      filename  = "/etc/secrets/loki_user"
    }

    local.file "loki_secret" {
      filename  = "/etc/secrets/loki_password"
      is_secret = true
    }

    loki.write "local" {
      external_labels = {
        cluster_id = "mesg-no1-dev-cluster-ypwf",
        product = "mesg",
        sbregion = "no1",
        env = "dev",
        job = "loki.source.file",
        log_forwarder = "alloy",
      }

      endpoint {
        batch_size = "10MiB"
        url = "https://loki.sbtools.io/loki/api/v1/push"
        basic_auth {
          username = local.file.loki_user.content
          password = local.file.loki_secret.content
        }
      }
    }

    // pyroscope related
    discovery.process "all" {
      join = discovery.kubernetes.local_pods.targets
    }

    discovery.relabel "pyroscope_java" {
      targets = discovery.kubernetes.local_pods.targets

      rule {
        source_labels = ["__meta_process_exe"]
        action = "keep"
        regex = ".*/java$"
      }

      rule {
          action = "drop"
          regex = "Succeeded|Failed|Completed"
          source_labels = ["__meta_kubernetes_pod_phase"]
      }

      rule {
        source_labels = ["__meta_kubernetes_pod_annotation_grafana_com_scrape_pyroscope"]
        regex = "true"
        action = "keep"
      }

      rule {
        source_labels = ["__meta_kubernetes_pod_annotation_grafana_com_profiler"]
        regex = "java"
        action = "keep"
      }

      rule {
          action = "replace"
          regex = "(.*)@(.*)"
          replacement = "java/${1}/${2}"
          separator = "@"
          source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
          target_label = "service_name"
      }

      rule {
        action = "replace"
        source_labels = ["__meta_kubernetes_node_name"]
        target_label = "node"
      }

      rule {
        source_labels = [
          "__meta_kubernetes_namespace",
          "__meta_kubernetes_pod_name",
          "__meta_kubernetes_pod_container_name",
        ]
        action = "replace"
        regex = "(.+);(.+);(.+)"
        target_label = "instance"
        replacement = "${1}/${2}:${3}"
      }
    }

    pyroscope.java "local" {
      targets    = discovery.relabel.pyroscope_java.output
      forward_to = [pyroscope.write.central.receiver]
    }

    discovery.relabel "pyroscope_ebpf" {
      targets = discovery.kubernetes.local_pods.targets

      rule {
        source_labels = ["__meta_kubernetes_pod_annotation_grafana_com_scrape_pyroscope"]
        regex = "true"
        action = "keep"
      }

      rule {
        source_labels = ["__meta_kubernetes_pod_annotation_grafana_com_profiler"]
        regex = "ebpf"
        action = "keep"
      }

      rule {
          action = "drop"
          regex = "Succeeded|Failed"
          source_labels = ["__meta_kubernetes_pod_phase"]
      }

      rule {
          action = "replace"
          regex = "(.*)@(.*)"
          replacement = "ebpf/${1}/${2}"
          separator = "@"
          source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
          target_label = "service_name"
      }

      rule {
        action = "replace"
        source_labels = ["__meta_kubernetes_node_name"]
        target_label = "node"
      }

      rule {
        source_labels = [
          "__meta_kubernetes_namespace",
          "__meta_kubernetes_pod_name",
          "__meta_kubernetes_pod_container_name",
        ]
        action = "replace"
        regex = "(.+);(.+);(.+)"
        target_label = "instance"
        replacement = "${1}/${2}:${3}"
      }
    }

    pyroscope.ebpf "local" {
      targets    = discovery.relabel.pyroscope_ebpf.output
      forward_to = [pyroscope.write.central.receiver]
    }

    local.file "pyroscope_user" {
      filename  = "/etc/secrets/pyroscope_user"
    }

    local.file "pyroscope_secret" {
      filename  = "/etc/secrets/pyroscope_password"
      is_secret = true
    }

    pyroscope.write "central" {
      endpoint {
        url = "https://pyroscope.sbtools.io/"

        basic_auth {
          username = local.file.pyroscope_user.content
          password = local.file.pyroscope_secret.content
        }
      }

      external_labels = {
        product = "mesg",
        sbregion = "no1",
        env = "dev",
        detail = "",
      }
    }

Logs

ts=2024-06-26T07:00:21.625242627Z level=info "boringcrypto enabled"=false
ts=2024-06-26T07:00:21.620741161Z level=info source=/go/pkg/mod/github.com/!kim!machine!gun/automemlimit@v0.6.0/memlimit/memlimit.go:176 msg="GOMEMLIMIT is updated" package=github.com/KimMachineGun/automemlimit/memlimit GOMEMLIMIT=188743680
ts=2024-06-26T07:00:21.625495708Z level=info msg="starting complete graph evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7
ts=2024-06-26T07:00:21.625555409Z level=info msg="Using pod service account via in-cluster config" component_path=/ component_id=discovery.kubernetes.local_pods
ts=2024-06-26T07:00:21.62559925Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=discovery.kubernetes.local_pods duration=387.769µs
ts=2024-06-26T07:00:21.625662799Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=discovery.relabel.pyroscope_java duration=262.509µs
ts=2024-06-26T07:00:21.625707493Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=discovery.relabel.loki duration=81.412µs
ts=2024-06-26T07:00:21.625775555Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=local.file_match.loki duration=24.058µs
ts=2024-06-26T07:00:21.625834837Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=discovery.relabel.pyroscope_ebpf duration=159.444µs
ts=2024-06-26T07:00:21.625887153Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=discovery.process.all duration=18.298µs
ts=2024-06-26T07:00:21.625928065Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=tracing duration=17.148µs
ts=2024-06-26T07:00:21.625997497Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=otel duration=6.786µs
ts=2024-06-26T07:00:21.626037678Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=local.file.loki_secret duration=154.175µs
ts=2024-06-26T07:00:21.626107266Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=local.file.loki_user duration=58.134µs
ts=2024-06-26T07:00:21.626148087Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=loki.write.local duration=695.48µs
ts=2024-06-26T07:00:21.626217273Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=loki.process.cri duration=321.865µs
ts=2024-06-26T07:00:21.626257692Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=loki.source.file.loki duration=207.6µs
ts=2024-06-26T07:00:21.626326123Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=labelstore duration=15.229µs
ts=2024-06-26T07:00:21.626386234Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=local.file.pyroscope_user duration=86.918µs
ts=2024-06-26T07:00:21.626441102Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=logging duration=1.226504ms
ts=2024-06-26T07:00:21.626538613Z level=info msg="applying non-TLS config to HTTP server" service=http
ts=2024-06-26T07:00:21.626596008Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=http duration=93.276µs
ts=2024-06-26T07:00:21.626676247Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=cluster duration=18.536µs
ts=2024-06-26T07:00:21.626752112Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=ui duration=18.257µs
ts=2024-06-26T07:00:21.626939732Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=local.file.pyroscope_secret duration=120.912µs
ts=2024-06-26T07:00:21.627298127Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=pyroscope.write.central duration=278.484µs
ts=2024-06-26T07:00:21.627738516Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=pyroscope.ebpf.local duration=353.881µs
ts=2024-06-26T07:00:21.727177568Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=pyroscope.java.local duration=99.358706ms
ts=2024-06-26T07:00:21.727459318Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 node_id=remotecfg duration=94.261µs
ts=2024-06-26T07:00:21.727555925Z level=info msg="finished complete graph evaluation" controller_path=/ controller_id="" trace_id=e26357ff7b41bd8047520416ff3410d7 duration=105.118202ms
ts=2024-06-26T07:00:21.728016204Z level=info msg="scheduling loaded components and services"
ts=2024-06-26T07:00:21.733794793Z level=info msg="now listening for http traffic" service=http addr=0.0.0.0:12345
ts=2024-06-26T07:00:21.734526991Z level=info msg="starting cluster node" peers="" advertise_addr=127.0.0.1:12345
ts=2024-06-26T07:00:21.73500802Z level=info msg="peers changed" new_peers=o11y-alloy-daemonset-hwkhs