open-telemetry / opentelemetry-collector-contrib

Contrib repository for the OpenTelemetry Collector
https://opentelemetry.io
Apache License 2.0
2.84k stars 2.23k forks source link

[Processor/k8sattributes] Pod/Node attributes are not attaching with metrics generated by Kubeletstats receiver #34075

Open lazyboson opened 1 month ago

lazyboson commented 1 month ago

Component(s)

processor/k8sattributes

What happened?

Description

I am running the otel collector on the k8s cluster as daemonset. I am generating node and pod metrics using kubeletstatsreceiver. I can't see any label attached to metrics on grafana.

Steps to Reproduce

Expected Result

Actual Result

Collector version

0.104.0

Environment information

Environment

OS: Ubuntu Compiler(if manually compiled): NA

OpenTelemetry Collector configuration

config:
  receivers:
    otlp:
      protocols:
        http:
    loki:
      protocols:
        http:
      use_incoming_timestamp: true
    kubeletstats:
      collection_interval: 20s
      auth_type: "serviceAccount"
      endpoint: "${env:K8S_NODE_IP}:10250"
      insecure_skip_verify: true
      extra_metadata_labels:
        - container.id
      metrics:
        k8s.node.cpu.usage:
          enabled: true
        k8s.node.memory.available:
          enabled: true
        k8s.node.memory.usage:
          enabled: true
        k8s.pod.cpu.usage:
          enabled: true
        k8s.pod.memory.usage:
          enabled: true
  exporters:
    otlphttp:
      endpoint: https://API_KEY@API_SECRET_KEY@qryn.gigapipe.io
      timeout: 30s
      compression: none
      encoding: proto
    loki:
      endpoint: https://API_KEY@API_SECRET_KEY@qryn.gigapipe.io/loki/api/v1/push
      timeout: 30s
    prometheusremotewrite:
      endpoint: https://API_KEY@API_SECRET_KEY@qryn.gigapipe.io/prom/remote/write
      timeout: 30s
  processors:
    k8sattributes:
      auth_type: "serviceAccount"
      passthrough: false
      filter:
        node_from_env_var: KUBE_NODE_NAME
      extract:
        metadata:
          - k8s.pod.name
          - k8s.pod.uid
          - k8s.deployment.name
          - k8s.namespace.name
          - k8s.node.name
        labels:
          - tag_name: app.label.component
            key: app.kubernetes.io/component
            from: pod
      pod_association:
        - sources:
          - from: resource_attribute
            name: k8s.pod.ip
        - sources:
          - from: resource_attribute
            name: k8s.pod.uid
        - sources:
          - from: connection
    attributes:
      actions:
        - action: insert
          key: loki.attribute.labels
          value: sender
    memory_limiter:
      check_interval: 1s
      limit_mib: 4000
      spike_limit_mib: 800
    batch:
      send_batch_max_size: 10000
      timeout: 20s
  connectors:
    servicegraph:
      latency_histogram_buckets: [ 100us, 1ms, 2ms, 6ms, 10ms, 100ms, 250ms ]
      dimensions: [ cluster, namespace ]
      store:
        ttl: 2s
        max_items: 1000
      cache_loop: 2m
      store_expiration_loop: 2s
      virtual_node_peer_attributes:
        - db.name
        - rpc.service
    spanmetrics:
      namespace: traces.spanmetrics
      exemplars:
        enabled: false
      dimensions_cache_size: 1000
      aggregation_temporality: 'AGGREGATION_TEMPORALITY_CUMULATIVE'
      metrics_flush_interval: 30s
      events:
        enabled: false
  service:
    pipelines:
      traces:
        receivers: [otlp]
        processors: [memory_limiter, batch]
        exporters: [otlphttp, spanmetrics, servicegraph]
      logs:
        receivers: [loki]
        exporters: [loki]
      metrics:
        receivers: [kubeletstats, spanmetrics, servicegraph]
        processors: [k8sattributes]
        exporters: [prometheusremotewrite]

Log output

[root@ip-20-1-66-54 opentelemetry]# kubectl logs otelcollector-opentelemetry-collector-agent-2rzkx -n otel
2024-07-15T11:30:55.202Z        info    service@v0.104.0/service.go:115 Setting up own telemetry...
2024-07-15T11:30:55.202Z        info    service@v0.104.0/telemetry.go:96        Serving metrics {"address": "20.1.172.48:8888", "level": "Normal"}
2024-07-15T11:30:55.203Z        info    spanmetricsconnector@v0.104.0/connector.go:113  Building spanmetrics connector  {"kind": "connector", "name": "spanmetrics", "exporter_in_pipeline": "traces", "receiver_in_pipeline": "metrics"}
2024-07-15T11:30:55.203Z        info    lokiexporter@v0.104.0/exporter.go:43    using the new Loki exporter     {"kind": "exporter", "data_type": "logs", "name": "loki"}
2024-07-15T11:30:55.204Z        info    memorylimiter/memorylimiter.go:77       Memory limiter configured       {"kind": "processor", "name": "memory_limiter", "pipeline": "logs", "limit_mib": 4000, "spike_limit_mib": 800, "check_interval": 1}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:2997      [WARNING] `container.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric container.cpu.usage instead.  {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3000      [WARNING] `k8s.node.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric k8s.node.cpu.usage instead.    {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3003      [WARNING] `k8s.pod.cpu.utilization` should not be enabled: This metric will be disabled in a future release. Use metric k8s.pod.cpu.usage instead.       {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:2997      [WARNING] `container.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric container.cpu.usage instead.  {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3000      [WARNING] `k8s.node.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric k8s.node.cpu.usage instead.    {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3003      [WARNING] `k8s.pod.cpu.utilization` should not be enabled: This metric will be disabled in a future release. Use metric k8s.pod.cpu.usage instead.       {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:2997      [WARNING] `container.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric container.cpu.usage instead.  {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3000      [WARNING] `k8s.node.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric k8s.node.cpu.usage instead.    {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3003      [WARNING] `k8s.pod.cpu.utilization` should not be enabled: This metric will be disabled in a future release. Use metric k8s.pod.cpu.usage instead.       {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:2997      [WARNING] `container.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric container.cpu.usage instead.  {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3000      [WARNING] `k8s.node.cpu.utilization` should not be enabled: WARNING: This metric will be disabled in a future release. Use metric k8s.node.cpu.usage instead.    {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        warn    metadata/generated_metrics.go:3003      [WARNING] `k8s.pod.cpu.utilization` should not be enabled: This metric will be disabled in a future release. Use metric k8s.pod.cpu.usage instead.       {"kind": "receiver", "name": "kubeletstats", "data_type": "metrics"}
2024-07-15T11:30:55.211Z        info    extension@v0.104.0/extension.go:172     Deprecated component. Will be removed in future releases.       {"kind": "extension", "name": "memory_ballast"}
2024-07-15T11:30:55.213Z        info    service@v0.104.0/service.go:193 Starting otelcol-contrib...     {"Version": "0.104.0", "NumCPU": 2}
2024-07-15T11:30:55.213Z        info    extensions/extensions.go:34     Starting extensions...
2024-07-15T11:30:55.213Z        info    extensions/extensions.go:37     Extension is starting...        {"kind": "extension", "name": "memory_ballast"}
2024-07-15T11:30:55.215Z        info    ballastextension@v0.104.0/memory_ballast.go:41  Setting memory ballast  {"kind": "extension", "name": "memory_ballast", "MiBs": 204}
2024-07-15T11:30:55.215Z        info    extensions/extensions.go:52     Extension started.      {"kind": "extension", "name": "memory_ballast"}
2024-07-15T11:30:55.215Z        info    extensions/extensions.go:37     Extension is starting...        {"kind": "extension", "name": "health_check"}
2024-07-15T11:30:55.319Z        info    healthcheckextension@v0.104.0/healthcheckextension.go:32        Starting health_check extension {"kind": "extension", "name": "health_check", "config": {"Endpoint":"20.1.172.48:13133","TLSSetting":null,"CORS":null,"Auth":null,"MaxRequestBodySize":0,"IncludeMetadata":false,"ResponseHeaders":null,"CompressionAlgorithms":null,"Path":"/","ResponseBody":null,"CheckCollectorPipeline":{"Enabled":false,"Interval":"5m","ExporterFailureThreshold":5}}}
2024-07-15T11:30:55.321Z        info    extensions/extensions.go:52     Extension started.      {"kind": "extension", "name": "health_check"}
2024-07-15T11:30:55.392Z        info    kube/client.go:122      k8s filtering   {"kind": "processor", "name": "k8sattributes", "pipeline": "metrics", "labelSelector": "", "fieldSelector": "spec.nodeName=ip-20-1-65-114.ec2.internal"}
2024-07-15T11:30:55.392Z        info    spanmetricsconnector@v0.104.0/connector.go:206  Starting spanmetrics connector  {"kind": "connector", "name": "spanmetrics", "exporter_in_pipeline": "traces", "receiver_in_pipeline": "metrics"}
2024-07-15T11:30:55.392Z        info    servicegraphconnector@v0.104.0/connector.go:151 Started servicegraphconnector   {"kind": "connector", "name": "servicegraph", "exporter_in_pipeline": "traces", "receiver_in_pipeline": "metrics"}
2024-07-15T11:30:55.394Z        info    otlpreceiver@v0.104.0/otlp.go:102       Starting GRPC server    {"kind": "receiver", "name": "otlp", "data_type": "traces", "endpoint": "20.1.172.48:4317"}
2024-07-15T11:30:55.394Z        info    lokireceiver@v0.104.0/loki.go:122       Starting HTTP server    {"kind": "receiver", "name": "loki", "data_type": "logs", "endpoint": "localhost:3500"}
2024-07-15T11:30:55.394Z        info    healthcheck/handler.go:132      Health Check state change       {"kind": "extension", "name": "health_check", "status": "ready"}
2024-07-15T11:30:55.395Z        info    service@v0.104.0/service.go:219 Everything is ready. Begin running and processing data.

Additional context

I have also tried with - k8sattributes: and k8sattributes: k8sattributes/2:

all RBAC permissions are given and i can't see any permision issue in the collector logs.

github-actions[bot] commented 1 month ago

Pinging code owners:

marcoboi commented 3 weeks ago

Having the same issue, pasting here a minimal configuration.

apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
metadata:
  name: "otel-agent"
spec:
  mode: daemonset
  tolerations:
    - operator: Exists
  replicas: 1
  resources:
    limits:
      memory: 200Mi
  env:
    - name: K8S_NODE_NAME
      valueFrom:
        fieldRef:
          fieldPath: spec.nodeName

  config:

    receivers:
      kubeletstats:
        collection_interval: 10s
        auth_type: serviceAccount
        endpoint: "https://${env:K8S_NODE_NAME}:10250"

    processors:

      k8sattributes:
        filter:
          node_from_env_var: KUBE_NODE_NAME
        auth_type: serviceAccount
        passthrough: false

    exporters:
      otlphttp/metrics:
        endpoint: http://{{ .Release.Name }}-prometheus-server:9090/api/v1/otlp
        tls:
          insecure: true
      debug/metrics: { }
    service:
      pipelines:
        metrics:
          receivers:
            - kubeletstats
          processors:
            - k8sattributes
          exporters:
            - otlphttp/metrics
            - debug/metrics
TylerHelmuth commented 3 weeks ago

The common reason this happens is because the Pod IP that the k8sattributesprocessor is getting from the api doesn't match the incoming request's IP. If these 2 values don't match, then IP cannot be used as the association source.

marcoboi commented 3 weeks ago

Thank you for chiming in @TylerHelmuth. Could you point me to any resources that would help me debug this? How can I print the IP obtained from the APIs and the one associated to the incoming metrics?

ChrsMark commented 3 weeks ago

Hey @marcoboi, you can increase the verbosity of the debug exporter and disable temporarily the k8sattributes processor so as to check what data is collected by the kubeletstats receiver at first place:

debug:
  verbosity: detailed

In general I would suggest to consulting what the official Helm Chart define for this processor: https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-collector/templates/_config.tpl#L195

Since this Helm Chart is widely used I assume its config would cover you as well.

marcoboi commented 3 weeks ago

Thank you @ChrsMark for the guidance. I did as you recommended and could verify some attributes are indeed already present in the metrics sent by kubeletstats. Attributes seem to be spread a bit here and there in the metrics logs, so not sure that's an issue.

As for the configuration, I took a look and my configuration seems reasonable.

An excerpt of the logs down below.

Resource SchemaURL: 
Resource attributes:
     -> k8s.node.name: Str(ip-10-0-12-189.eu-west-1.compute.internal)
ScopeMetrics #0
ScopeMetrics SchemaURL: 
InstrumentationScope otelcol/kubeletstatsreceiver 0.102.1
Metric #0
Descriptor:
     -> Name: k8s.node.cpu.time
     -> Description: Total cumulative CPU time (sum of all cores) spent by the container/pod/node since its creation
     -> Unit: s
     -> DataType: Sum
     -> IsMonotonic: true
     -> AggregationTemporality: Cumulative
NumberDataPoints #0
StartTimestamp: 2024-07-29 06:38:16 +0000 UTC
Timestamp: 2024-07-31 10:43:46.280612734 +0000 UTC
Value: 5693.698020
Metric #1
Descriptor:
     -> Name: k8s.node.cpu.utilization
     -> Description: Node CPU utilization
     -> Unit: 1
     -> DataType: Gauge
NumberDataPoints #0
StartTimestamp: 2024-07-29 06:38:16 +0000 UTC
Timestamp: 2024-07-31 10:43:46.280612734 +0000 UTC
Value: 0.024769
Metric #2
...

Resource attributes:
     -> k8s.pod.uid: Str(d71fae48-7100-4cb9-aa29-698b18d668ad)
     -> k8s.pod.name: Str(kube-proxy-rg27q)
     -> k8s.namespace.name: Str(kube-system)
ScopeMetrics #0
ScopeMetrics SchemaURL: 
InstrumentationScope otelcol/kubeletstatsreceiver 0.102.1
Metric #0
Descriptor:
     -> Name: k8s.pod.cpu.time
     -> Description: Total cumulative CPU time (sum of all cores) spent by the container/pod/node since its creation
     -> Unit: s
     -> DataType: Sum
     -> IsMonotonic: true
     -> AggregationTemporality: Cumulative
NumberDataPoints #0
StartTimestamp: 2024-07-29 06:38:37 +0000 UTC
Timestamp: 2024-07-31 10:43:46.280612734 +0000 UTC
Value: 66.195558
Metric #1
Descriptor:
     -> Name: k8s.pod.cpu.utilization
     -> Description: Pod CPU utilization
     -> Unit: 1
     -> DataType: Gauge
...
ChrsMark commented 3 weeks ago

@marcoboi could you try defining more pod_association rules?

The configuration you have provided is the following:

      k8sattributes:
        filter:
          node_from_env_var: KUBE_NODE_NAME
        auth_type: serviceAccount
        passthrough: false

While the one I suggested from the Helm Chart is:

k8sattributes:
    filter:
      node_from_env_var: K8S_NODE_NAME
    passthrough: false
    pod_association:
    - sources:
      - from: resource_attribute
        name: k8s.pod.ip
    - sources:
      - from: resource_attribute
        name: k8s.pod.uid
    - sources:
      - from: connection
    extract:
      metadata:
        - "k8s.namespace.name"
        - "k8s.deployment.name"
        - "k8s.statefulset.name"
        - "k8s.daemonset.name"
        - "k8s.cronjob.name"
        - "k8s.job.name"
        - "k8s.node.name"
        - "k8s.pod.name"
        - "k8s.pod.uid"
        - "k8s.pod.start_time"
      labels:
        - tag_name: $$1
          key_regex: (.*)
          from: pod
      annotations:
        - tag_name: $$1
          key_regex: (.*)
          from: pod

Also could you define what you expect to see here and what you actually see? The issue's description explicitly mentions Metrics should have labels attached with them. (BTW, I think the tittle is misleading here: if we only care about labels, it should mention labels explicitly and not attributes). Particularly, note that based on the docs only labels that you have defined will be added:

This config represents a list of annotations/labels that are extracted from pods/namespaces/nodes and added to spans, metrics and logs.

marcoboi commented 3 weeks ago

Hi @ChrsMark,

you're right, let me clearly state the problem.

Setup The setup consists of the following components:

Expectation I'm expecting the metrics available in Grafana (and Prometheus UI) to contain attributes like namespace, pod and container name.

Current status The (kubelet) metrics I get in Grafana typically do not have any attributes attached to them, for example:

k8s_node_cpu_utilization_ratio{}
k8s_node_filesystem_available_bytes{}
k8s_pod_cpu_utilization_ratio{}

Thank you again for your help and please let me know if I can provide any further details.

marcoboi commented 3 weeks ago

Following up on my last message, after adding the k8sattributes back into the pipeline I can see some more attributes in the logs (see below). The problem is that the metrics in Grafana still do not have those attributes.

...
ResourceMetrics #1
Resource SchemaURL: 
Resource attributes:
     -> k8s.pod.uid: Str(21536284-5079-4426-a606-8c0c11632c66)
     -> k8s.pod.name: Str(loki-write-1)
     -> k8s.namespace.name: Str(telemetry)
     -> k8s.node.name: Str(ip-10-0-14-120.eu-west-1.compute.internal)
     -> app.kubernetes.io/name: Str(loki)
     -> app.kubernetes.io/part-of: Str(memberlist)
     -> statefulset.kubernetes.io/pod-name: Str(loki-write-1)
     -> k8s.statefulset.name: Str(loki-write)
     -> checksum/config: Str(e512a6e3ac9f805e2c125b82a4c8efd08812a1b10e796e2d73510e7f31fbcdc0)
     -> app.kubernetes.io/component: Str(write)
     -> app.kubernetes.io/instance: Str(telemetry)
     -> controller-revision-hash: Str(loki-write-647b69497f)
     -> k8s.pod.start_time: Str(2024-07-31T12:38:00Z)
     -> apps.kubernetes.io/pod-index: Str(1)
...

So it seems (correct me if I'm wrong there) that the attributes are sourced but not attached to the metrics.

marcoboi commented 3 weeks ago

It seems that not even the attributes already provided with kubeletstats are attached to the metrics. I'm wondering if the issue here is the otlphttp exporter, rather than the k8sattributes processor.

It seems there's an open issue attended by @TylerHelmuth that points in the same direction.

ChrsMark commented 3 weeks ago

Thank's @marcoboi. I'm not 100% sure how you could ensure if that's an issue with the exporter or if something else happens at ingest time in the back-end's side.

What I would try here is to send data to another collector and export them in the console using the debug exporter. So you have otlpexporter(collector)1->otlpreceiver (collector2)->debugexporter(collector2). This could show if the otlp exporter is problematic or not and then we talk about a different issue. Otherwise it'd be a back-end issue.

marcoboi commented 3 weeks ago

Thanks @ChrsMark , I'll try export the data and capture them again as you suggest.

marcoboi commented 3 weeks ago

As suggested by @ChrsMark , I've tried chaining two collectors (a source and a target collector) to:

  1. receive metrics from kubeletstats into a source collector
  2. attach k8sattributes to the data in the source collector
  3. prints the data using the debug exporter
  4. export the data from the source collector using the otlphttp exporter
  5. receive the data in the target collector using the otlp receiver using the http endpoint
  6. prints the data using the debug exporter
  7. push the data to Prometheus

The attributes in the source collector are passed on to the target collector:

source collector

Resource SchemaURL: 
Resource attributes:
     -> k8s.pod.uid: Str(2fb086b0-09af-4406-8616-67489d7860da)
     -> k8s.pod.name: Str(aws-node-65qpg)
     -> k8s.namespace.name: Str(kube-system)
     -> k8s.container.name: Str(aws-node)
     -> k8s.node.name: Str(ip-10-0-12-189.eu-west-1.compute.internal)
     -> app.kubernetes.io/name: Str(aws-node)
     -> controller-revision-hash: Str(5c94b6c887)
     -> k8s-app: Str(aws-node)
     -> k8s.pod.start_time: Str(2024-07-29T06:39:38Z)
     -> k8s.daemonset.name: Str(aws-node)
     -> pod-template-generation: Str(2)
     -> app.kubernetes.io/instance: Str(aws-vpc-cni)

target collector

Resource SchemaURL: 
Resource attributes:
     -> k8s.pod.uid: Str(8b5c7836-9689-4695-9000-a30a95280c50)
     -> k8s.pod.name: Str(ebs-csi-node-jgfpc)
     -> k8s.namespace.name: Str(kube-system)
     -> app: Str(ebs-csi-node)
     -> k8s.pod.start_time: Str(2024-07-29T06:39:21Z)
     -> k8s.daemonset.name: Str(ebs-csi-node)
     -> app.kubernetes.io/component: Str(csi-driver)
     -> app.kubernetes.io/managed-by: Str(EKS)
     -> app.kubernetes.io/name: Str(aws-ebs-csi-driver)
     -> app.kubernetes.io/version: Str(1.32.0)
     -> k8s.node.name: Str(ip-10-0-12-189.eu-west-1.compute.internal)
     -> controller-revision-hash: Str(7fc8644f48)
     -> pod-template-generation: Str(1)

Considerations So this rules out an issue at the level of the otlphttp exporter. Still, the issue at the level of Prometheus remains, where I'm not able to display the attributes to filter on metrics.

I'm attaching the configuration used for this experiment.

Source collector

apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
metadata:
  name: "{{ .Release.Name }}-{{ .Values.otelAgent }}"
spec:
  mode: daemonset
  tolerations:
    - operator: Exists
  replicas: 1
  resources:
    limits:
      memory: 200Mi

  env:
    - name: K8S_NODE_NAME
      valueFrom:
        fieldRef:
          fieldPath: spec.nodeName

  config:

    receivers:
      kubeletstats:
        collection_interval: 10s
        auth_type: serviceAccount
        endpoint: "https://${env:K8S_NODE_NAME}:10250"

    processors:

      k8sattributes:
        filter:
          node_from_env_var: K8S_NODE_NAME
        passthrough: false
        pod_association:
          - sources:
              - from: resource_attribute
                name: k8s.pod.ip
          - sources:
              - from: resource_attribute
                name: k8s.pod.uid
          - sources:
              - from: connection
        extract:
          metadata:
            - "k8s.namespace.name"
            - "k8s.deployment.name"
            - "k8s.statefulset.name"
            - "k8s.daemonset.name"
            - "k8s.cronjob.name"
            - "k8s.job.name"
            - "k8s.node.name"
            - "k8s.pod.name"
            - "k8s.pod.uid"
            - "k8s.pod.start_time"
          labels:
            - tag_name: $$1
              key_regex: (.*)
              from: pod
          annotations:
            - tag_name: $$1
              key_regex: (.*)
              from: pod

    exporters:
      otlphttp/metrics:
        endpoint: http://telemetry-otel-agent-target-collector.{{ .Release.Namespace }}.svc.cluster.local:4318
        tls:
          insecure: true
      debug/metrics:
        verbosity: detailed
    service:
      pipelines:
        metrics:
          receivers:
            - kubeletstats
          processors:
            - k8sattributes
          exporters:
            - otlphttp/metrics
            - debug/metrics

Target collector

apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
metadata:
  name: "{{ .Release.Name }}-otel-agent-target"
spec:
  mode: deployment
  tolerations:
    - operator: Exists
  replicas: 1
  resources:
    limits:
      memory: 200Mi

  config:

    receivers:

      otlp:
        protocols:
          grpc:
            endpoint: 0.0.0.0:4317
          http:
            endpoint: 0.0.0.0:4318

    exporters:
      otlphttp/metrics:
        endpoint: http://{{ .Release.Name }}-prometheus-server:9090/api/v1/otlp
        tls:
          insecure: true
      debug/metrics:
        verbosity: detailed

    service:
      pipelines:
        metrics:
          receivers:
            - otlp
          processors: [ ]
          exporters:
            - otlphttp/metrics
            - debug/metrics
marcoboi commented 3 weeks ago

Am I maybe missing something about the relationship between attributes and labels? It seems to me an attribute attached to the metrics should translate into a label in Prometheus. But maybe I'm missing something?

ChrsMark commented 3 weeks ago

I think that's a question for the Prometheus project, to figure out what is the proposed way to ship data from a Collector to Prometheus and what is supported there. You can create a separate issue if needed to avoid loading the current one (the issue is unrelated to k8sattributes processor :)).

marcoboi commented 3 weeks ago

Thanks, I think it's clear we're not dealing with an issue related to k8sattributes so this issue can be closed. I'll start a conversation there and link it here for traceability. Thanks for your valuable help @ChrsMark .

marcoboi commented 2 weeks ago

For the chronicles, opened an issue on the prometheus repo to understand this issue better. https://github.com/prometheus/prometheus/issues/14580

marcoboi commented 2 weeks ago

I finally managed to get those attributes to attach to the metrics and show as labels in Prometheus and Grafana. Specifically, adding the following transform processor to the pipeline will ensure the basic attributes get through to Prometheus:

      transform:
        metric_statements:
          - context: datapoint
            statements:
              - set(attributes["namespace"], resource.attributes["k8s.namespace.name"])
              - set(attributes["container"], resource.attributes["k8s.container.name"])
              - set(attributes["pod"], resource.attributes["k8s.pod.name"])
              - set(attributes["pod_id"], resource.attributes["k8s.pod.uid"])
              - set(attributes["node"], resource.attributes["k8s.node.name"])
              - set(attributes["app"], resource.attributes["k8s-app"])

From what I understand, while it is true that attributes in Otel metrics will end up as labels in Prometheus, some restrictions exists in terms of what those attributes' name should be like. I'm not sure if the nesting or the fact that dot-separator is the crucial factor there.

I'm pasting here the full manifest of a working Collector that:

apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
metadata:
  name: "otel-agent-collector"
spec:

  mode: daemonset

  replicas: 1
  resources:
    limits:
      memory: 200Mi

  env:
    - name: K8S_NODE_NAME
      valueFrom:
        fieldRef:
          fieldPath: spec.nodeName

  config:

    receivers:
      kubeletstats:
        collection_interval: 10s
        auth_type: serviceAccount
        endpoint: "https://${env:K8S_NODE_NAME}:10250"

    processors:

      k8sattributes:
        filter:
          node_from_env_var: K8S_NODE_NAME
        passthrough: false
        pod_association:
          - sources:
              - from: resource_attribute
                name: k8s.pod.ip
          - sources:
              - from: resource_attribute
                name: k8s.pod.uid
          - sources:
              - from: connection
        extract:
          metadata:
            - "k8s.namespace.name"
            - "k8s.deployment.name"
            - "k8s.statefulset.name"
            - "k8s.daemonset.name"
            - "k8s.cronjob.name"
            - "k8s.job.name"
            - "k8s.node.name"
            - "k8s.pod.name"
            - "k8s.pod.uid"
            - "k8s.pod.start_time"
          labels:
            - tag_name: $$1
              key_regex: (.*)
              from: pod
          annotations:
            - tag_name: $$1
              key_regex: (.*)
              from: pod

      transform:
        metric_statements:
          - context: datapoint
            statements:
              - set(attributes["namespace"], resource.attributes["k8s.namespace.name"])
              - set(attributes["container"], resource.attributes["k8s.container.name"])
              - set(attributes["pod"], resource.attributes["k8s.pod.name"])
              - set(attributes["pod_id"], resource.attributes["k8s.pod.uid"])
              - set(attributes["node"], resource.attributes["k8s.node.name"])
              - set(attributes["app"], resource.attributes["k8s-app"])

    exporters:
      otlphttp/metrics:
        endpoint: http://{{ .Release.Name }}-prometheus-server:9090/api/v1/otlp
        tls:
          insecure: true
      debug/metrics:
        verbosity: detailed

    service:
      pipelines:
        metrics:
          receivers:
            - kubeletstats
          processors:
            - k8sattributes
            - transform
          exporters:
            - otlphttp/metrics
            - debug/metrics

I'd be happy to contribute to the documentation to clarify this point.