open-telemetry / opentelemetry-collector

OpenTelemetry Collector
https://opentelemetry.io
Apache License 2.0
4.3k stars 1.42k forks source link

memory_limitter does not work when mounting host filesystem with cgroupsv1 #6826

Closed povilasv closed 9 months ago

povilasv commented 1 year ago

Describe the bug

Memory Limiter processor might incorrectly work when using cgroups v1 and mounting host's /sys filesystem. We need to mount the host filesystem for hostmetrics processor to work. Example -> https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-collector/templates/_pod.tpl#L109-L113

In this case /proc/self/mountinfo will contain many memory cgroups for different containers running on the host.

This issue was reported in helm charts repository -> https://github.com/open-telemetry/opentelemetry-helm-charts/issues/543

Steps to reproduce

Run OpenTelemetry Collector on Kubernetes with Docker and make the Pod mount host filesystem

What did you expect to see?

Memory limiter reading correct cgroup file and getting total memory.

What did you see instead? Memory limiter trying to read incorrect file.

What version did you use? Version: v0.66.0

What config did you use? Config: (e.g. the yaml config file)

apiVersion: v1
data:
  relay: |
    exporters:
      coralogix:
        application_name: 'default'
        application_name_attributes:
        - k8s.namespace.name
        - service.namespace
        logs:
          endpoint: 'otel-logs.coralogix.com:443'
        metrics:
          endpoint: 'otel-metrics.coralogix.com:443'
        private_key: ${CORALOGIX_PRIVATE_KEY}
        subsystem_name: 'nodes'
        subsystem_name_attributes:
        - k8s.deployment.name
        - k8s.statefulset.name
        - k8s.daemonset.name
        - k8s.cronjob.name
        - k8s.job.name
        - k8s.container.name
        - k8s.node.name
        - service.name
        timeout: 30s
        traces:
          endpoint: 'otel-traces.coralogix.com:443'
      logging: {}
    extensions:
      file_storage:
        directory: /var/lib/otelcol
      health_check: {}
      memory_ballast:
        size_in_percentage: 40
      zpages:
        endpoint: localhost:55679
    processors:
      batch: {}
      k8sattributes:
        extract:
          metadata:
          - k8s.namespace.name
          - k8s.deployment.name
          - k8s.statefulset.name
          - k8s.daemonset.name
          - k8s.cronjob.name
          - k8s.job.name
        passthrough: false
        pod_association:
        - sources:
          - from: resource_attribute
            name: k8s.pod.ip
        - sources:
          - from: resource_attribute
            name: k8s.pod.uid
        - sources:
          - from: connection
      memory_limiter:
        check_interval: 5s
        limit_percentage: 80
        spike_limit_percentage: 25
      resourcedetection/env:
        detectors:
        - system
        - env
        override: false
        timeout: 2s
      spanmetrics:
        dimensions:
        - name: k8s.deployment.name
        - name: k8s.statefulset.name
        - name: k8s.daemonset.name
        - name: k8s.cronjob.name
        - name: k8s.job.name
        - name: k8s.container.name
        - name: k8s.node.name
        - name: k8s.namespace.name
        metrics_exporter: coralogix
    receivers:
      filelog:
        exclude:
        - /var/log/pods/sys-mon_otel-coralogix*_*/opentelemetry-collector/*.log
        include:
        - /var/log/pods/*/*/*.log
        include_file_name: false
        include_file_path: true
        operators:
        - id: get-format
          routes:
          - expr: body matches "^\\{"
            output: parser-docker
          - expr: body matches "^[^ Z]+ "
            output: parser-crio
          - expr: body matches "^[^ Z]+Z"
            output: parser-containerd
          type: router
        - id: parser-crio
          output: extract_metadata_from_filepath
          regex: ^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$
          timestamp:
            layout: "2006-01-02T15:04:05.000000000-07:00"
            layout_type: gotime
            parse_from: attributes.time
          type: regex_parser
        - id: parser-containerd
          output: extract_metadata_from_filepath
          regex: ^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$
          timestamp:
            layout: '%Y-%m-%dT%H:%M:%S.%LZ'
            parse_from: attributes.time
          type: regex_parser
        - id: parser-docker
          output: extract_metadata_from_filepath
          timestamp:
            layout: '%Y-%m-%dT%H:%M:%S.%LZ'
            parse_from: attributes.time
          type: json_parser
        - id: extract_metadata_from_filepath
          parse_from: attributes["log.file.path"]
          regex: ^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]+)\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$
          type: regex_parser
        - from: attributes.stream
          to: attributes["log.iostream"]
          type: move
        - from: attributes.container_name
          to: resource["k8s.container.name"]
          type: move
        - from: attributes.namespace
          to: resource["k8s.namespace.name"]
          type: move
        - from: attributes.pod_name
          to: resource["k8s.pod.name"]
          type: move
        - from: attributes.restart_count
          to: resource["k8s.container.restart_count"]
          type: move
        - from: attributes.uid
          to: resource["k8s.pod.uid"]
          type: move
        - from: attributes.log
          to: body
          type: move
        start_at: beginning
        storage: file_storage
      hostmetrics:
        collection_interval: 10s
        root_path: /hostfs
        scrapers:
          cpu: null
          disk: null
          filesystem:
            exclude_fs_types:
              fs_types:
              - autofs
              - binfmt_misc
              - bpf
              - cgroup2
              - configfs
              - debugfs
              - devpts
              - devtmpfs
              - fusectl
              - hugetlbfs
              - iso9660
              - mqueue
              - nsfs
              - overlay
              - proc
              - procfs
              - pstore
              - rpc_pipefs
              - securityfs
              - selinuxfs
              - squashfs
              - sysfs
              - tracefs
              match_type: strict
            exclude_mount_points:
              match_type: regexp
              mount_points:
              - /dev/*
              - /proc/*
              - /sys/*
              - /run/k3s/containerd/*
              - /var/lib/docker/*
              - /var/lib/kubelet/*
              - /snap/*
          load: null
          memory: null
          network: null
      jaeger:
        protocols:
          grpc:
            endpoint: ${MY_POD_IP}:14250
          thrift_binary:
            endpoint: ${MY_POD_IP}:6832
          thrift_compact:
            endpoint: ${MY_POD_IP}:6831
          thrift_http:
            endpoint: ${MY_POD_IP}:14268
      kubeletstats:
        auth_type: serviceAccount
        collection_interval: 20s
        endpoint: ${K8S_NODE_NAME}:10250
      otlp:
        protocols:
          grpc:
            endpoint: ${MY_POD_IP}:4317
          http:
            endpoint: ${MY_POD_IP}:4318
      prometheus:
        config:
          scrape_configs:
          - job_name: opentelemetry-collector
            scrape_interval: 30s
            static_configs:
            - targets:
              - ${MY_POD_IP}:8888
      zipkin:
        endpoint: ${MY_POD_IP}:9411
    service:
      extensions:
      - zpages
      - health_check
      - memory_ballast
      - file_storage
      pipelines:
        logs:
          exporters:
          - coralogix
          processors:
          - k8sattributes
          - batch
          receivers:
          - otlp
          - filelog
        metrics:
          exporters:
          - coralogix
          processors:
          - k8sattributes
          - memory_limiter
          - resourcedetection/env
          - batch
          receivers:
          - prometheus
          - otlp
          - hostmetrics
          - kubeletstats
        traces:
          exporters:
          - coralogix
          processors:
          - k8sattributes
          - memory_limiter
          - spanmetrics
          - batch
          receivers:
          - otlp
          - zipkin
          - jaeger
      telemetry:
        metrics:
          address: ${MY_POD_IP}:8888

Environment

Additional context Add any other context about the problem here.

atoulme commented 9 months ago

Is there more work required here?

povilasv commented 9 months ago

forgot to close issue, this is done.