aws / aws-for-fluent-bit

The source of the amazon/aws-for-fluent-bit container image
Apache License 2.0
461 stars 134 forks source link

Kubernetes Filter is not working with EKS-Fargate #301

Open vaibhavgupta3007 opened 2 years ago

vaibhavgupta3007 commented 2 years ago
### Describe the question/issue I am trying to setup logging in eks fargate using aws-observability. my logs are pushing to cloudwatch but seems like eks is overriding all my filter parameters. It is not applying kubernetes filter, I am expecting that every log message should have kuberenetes meta data. ### Configuration apiVersion: v1 data: filters.conf: | [FILTER] Name kubernetes Match kube.* Merge_Log On Buffer_Size 0 Kube_Meta_Cache_TTL 300s K8S-Logging.Parser On flb_log_cw: "true" output.conf: | [OUTPUT] Name cloudwatch_logs Match * region us-west-2 log_group_name eks/fluent-bit-cloudwatch log_stream_prefix from-fluent-bit- auto_create_group true log_key log kind: ConfigMap metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"v1","data":{"filters.conf":"[FILTER]\n Name kubernetes\n Match kube.*\n Merge_Log On\n Buffer_Size 0\n Kube_Meta_Cache_TTL 300s\n K8S-Logging.Parser On\n","flb_log_cw":"true","output.conf":"[OUTPUT]\n Name cloudwatch_logs\n Match *\n region us-west-2\n log_group_name eks/fluent-bit-cloudwatch\n log_stream_prefix from-fluent-bit-\n auto_create_group true\n log_key log\n"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"aws-logging","namespace":"aws-observability"}} creationTimestamp: "2022-02-15T20:45:14Z" name: aws-logging namespace: aws-observability resourceVersion: "61271873" uid: a024722d-85eb-4fe0-bac4-4a8da0646656 ### Fluent Bit Log Output { "log": "File /var/log/containers/*.log not present, sleeping 1" } -- { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "File /var/log/containers/*.log not present, sleeping 1" } { "log": "\u001b[1mFluent Bit v1.7.9\u001b[0m" } { "log": "* \u001b[1m\u001b[93mCopyright (C) 2019-2021 The Fluent Bit Authors\u001b[0m" } { "log": "* \u001b[1m\u001b[93mCopyright (C) 2015-2018 Treasure Data\u001b[0m" } { "log": "* Fluent Bit is a CNCF sub-project under the umbrella of Fluentd" } { "log": "* https://fluentbit.io" } { "log": "[2022/02/15 23:52:53] [ info] [engine] started (pid=574)" } { "log": "[2022/02/15 23:52:53] [ info] [storage] version=1.1.1, initializing..." } { "log": "[2022/02/15 23:52:53] [ info] [storage] in-memory" } { "log": "[2022/02/15 23:52:53] [ info] [storage] normal synchronization mode, checksum disabled, max_chunks_up=128" } { "log": "[2022/02/15 23:52:53] [ info] [filter:kubernetes:kubernetes.0] https=1 host=5A52A847F7CEAF2CB551EF1A1592F658.gr7.us-west-2.eks.amazonaws.com port=443" } { "log": "[2022/02/15 23:52:53] [ warn] [filter:kubernetes:kubernetes.0] cannot open /var/run/secrets/kubernetes.io/serviceaccount/namespace" } { "log": "[2022/02/15 23:52:53] [ info] [filter:kubernetes:kubernetes.0] Fluent Bit not running in a POD" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter log_group_name = 'eks-catalog-svc-fluent-bit-logs'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter default_log_group_name = 'fluentbit-default'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter log_stream_prefix = ''\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter log_stream_name = 'from-fluent-bit-service-quoting-apis-7969d86747-r5hcs-dev6'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter default_log_stream_name = '/fluentbit-default'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter region = 'us-west-2'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter log_key = ''\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter role_arn = ''\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter auto_create_group = 'true'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter new_log_group_tags = ''\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter log_retention_days = '0'\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter endpoint = ''\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter sts_endpoint = ''\"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter credentials_endpoint = \"" } { "log": "time=\"2022-02-15T23:52:53Z\" level=info msg=\"[cloudwatch 0] plugin parameter log_format = ''\"" } { "log": "[2022/02/15 23:52:53] [ info] [sp] stream processor started" } { "log": "[2022/02/15 23:52:53] [ info] [input:tail:tail.0] inotify_fs_add(): inode=1197979 watch_fd=1 name=/var/log/containers/service-quoting-apis-7969d86747-r5hcs_dev6_nr-busybox-50bd3f1af0a91c8c020aad74436db6a42ff8483b707d0140830b1c115c0a0611.log" } { "log": "[2022/02/15 23:52:53] [ info] [input:tail:tail.1] inotify_fs_add(): inode=1197900 watch_fd=1 name=/var/log/fluent-bit.log" } { "log": "[2022/02/15 23:52:58] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Creating log group eks/fluent-bit-cloudwatch" } { "log": "[2022/02/15 23:52:58] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Log Group eks/fluent-bit-cloudwatch already exists" } { "log": "[2022/02/15 23:52:58] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Creating log stream from-fluent-bit-flblogs.var.log.fluent-bit.log in log group eks/fluent-bit-cloudwatch" } { "log": "[2022/02/15 23:52:58] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Created log stream from-fluent-bit-flblogs.var.log.fluent-bit.log" } { "log": "time=\"2022-02-15T23:52:58Z\" level=info msg=\"[cloudwatch 0] Created log group eks-catalog-svc-fluent-bit-logs\\n\"" } { "log": "time=\"2022-02-15T23:52:58Z\" level=info msg=\"[cloudwatch 0] Created log stream from-fluent-bit-service-quoting-apis-7969d86747-r5hcs-dev6 in group eks-catalog-svc-fluent-bit-logs\"" } { "log": "[2022/02/15 23:53:03] [ info] [input:tail:tail.0] inotify_fs_add(): inode=1198004 watch_fd=2 name=/var/log/containers/service-quoting-apis-7969d86747-r5hcs_dev6_app-build-51acd33df535b6471a132fe1412d05914b382fcb57f78a03a718f25141505a0e.log" } { "log": "[2022/02/15 23:53:13] [ info] [input:tail:tail.0] inotify_fs_add(): inode=1198010 watch_fd=3 name=/var/log/containers/service-quoting-apis-7969d86747-r5hcs_dev6_service-quoting-apis-4ad92cd417d9f123057ecf3d9977c2cf0ba6c5eb7e365eaf29655f622db724bc.log" } { "log": "[2022/02/15 23:53:18] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Creating log stream from-fluent-bit-kube.var.log.containers.service-quoting-apis-7969d86747-r5hcs_dev6_service-quoting-apis-4ad92cd417d9f123057ecf3d9977c2cf0ba6c5eb7e365eaf29655f622db724bc.log in log group eks/fluent-bit-cloudwatch" } { "log": "[2022/02/15 23:53:18] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Created log stream from-fluent-bit-kube.var.log.containers.service-quoting-apis-7969d86747-r5hcs_dev6_service-quoting-apis-4ad92cd417d9f123057ecf3d9977c2cf0ba6c5eb7e365eaf29655f622db724bc.log" }
### Fluent Bit Version Info please refer log for version. ### Cluster Details EKS- Fargate ### Application Details

Steps to reproduce issue

Related Issues

PettitWesley commented 2 years ago
[FILTER]
    Name kubernetes
    Match kube.*
    Merge_Log On
    Buffer_Size 0
    Kube_Meta_Cache_TTL 300s
    K8S-Logging.Parser On
"flb_log_cw":"true",
"output.conf":"
[OUTPUT]
     Name cloudwatch_logs
     Match *
     region us-west-2
     log_group_name eks/fluent-bit-cloudwatch
     log_stream_prefix from-fluent-bit-
     auto_create_group true
     log_key log
PettitWesley commented 2 years ago

Hmm. Actually, the config you show can't be what was actually applied @vaibhavgupta3007 Because of the log output:

{ "log": "time="2022-02-15T23:52:53Z" level=info msg="[cloudwatch 0] plugin parameter auto_create_group = 'true'"" } { "log": "[2022/02/15 23:53:18] [ info] [output:cloudwatch_logs:cloudwatch_logs.0] Created log stream from-fluent-bit-kube.var.log.containers.service-quoting-apis-7969d86747-r5hcs_dev6_service-quoting-apis-4ad92cd417d9f123057ecf3d9977c2cf0ba6c5eb7e365eaf29655f622db724bc.log" }

Both cloudwatch and cloudwatch_logs plugins were enabled. So I think this is not actually the config applied to this pod.

PettitWesley commented 2 years ago

Please see: https://github.com/aws/containers-roadmap/issues/1625

vaibhavgupta3007 commented 2 years ago

@PettitWesley : can you pls elaborate more, like what do you mean by config not applied to pod. after updating configmap, i restarted my pod. So it should take latest configmap configuration and it should use kubernetes filter.

PettitWesley commented 2 years ago

@vaibhavgupta3007 TBH I'm not sure how this happened but given that I see both cloudwatch plugins enabled and configured... the config map you shared must not be the one that actually ran for this run. I'm not sure I can help more on that, you may need to contact AWS support.

As far as the lack of k8s metadata goes, AFAIK this is not a bug in Fluent Bit but is a a bug in EKS Fargate that the team is aware of. Please track that container roadmap issue. I apologize that can't be perfectly helpful on this at this time.

kncesarini commented 2 years ago

Is this still a confirmed bug and if so is there no workaround for it?

I am using the following config, taken basically line-by-line from the docs (https://docs.aws.amazon.com/eks/latest/userguide/fargate-logging.html) and am not seeing any k8s metadata being added.

(additionally the log_retention_days config seems to be ignored, but that's a sidetrack)

kind: ConfigMap
apiVersion: v1
data:
  filters.conf: |
    [FILTER]
        Name parser
        Match *
        Key_name log
        Parser crio
    [FILTER]
        Name kubernetes
        Match kube.*
        Merge_Log On
        Buffer_Size 0
        Kube_Meta_Cache_TTL 300s
  flb_log_cw: "true"
  output.conf: |
    [OUTPUT]
        Name cloudwatch_logs
        Match   *
        region eu-north-1
        log_group_name my-logs
        log_stream_prefix from-fluent-bit-
        log_retention_days 60
        auto_create_group true
        log_key log
  parsers.conf: |
    [PARSER]
        Name crio
        Format Regex
        Regex ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>P|F) (?<log>.*)$
        Time_Key    time
        Time_Format %Y-%m-%dT%H:%M:%S.%L%z
rsumukha commented 2 years ago

@kncesarini The bug has been fixed, to enrich logs with k8s-metadata log_key must be removed.

PettitWesley commented 2 years ago

@kncesarini log_key explanation is here: https://github.com/aws-samples/amazon-ecs-firelens-examples/tree/mainline/examples/fluent-bit/cloudwatchlogs

kncesarini commented 2 years ago

@rsumukha @PettitWesley Awesome, thanks for the quick support, works great now. I'm sure it would be super useful to others working with this to have a complete example in the docs I linked above. For reference this is my current config now which works great (removing log_key from output, and adding Keep_Log Off to the filter):

apiVersion: v1
data:
  filters.conf: |
    [FILTER]
        Name parser
        Match *
        Key_name log
        Parser crio
    [FILTER]
        Name kubernetes
        Match kube.*
        Merge_Log On
        Keep_Log Off
        Buffer_Size 0
        Kube_Meta_Cache_TTL 300s
  flb_log_cw: 'true'
  output.conf: |
    [OUTPUT]
        Name cloudwatch_logs
        Match   *
        region eu-north-1
        log_group_name my-logs
        log_stream_prefix from-fluent-bit-
        log_retention_days 60
        auto_create_group true
  parsers.conf: |
    [PARSER]
        Name crio
        Format Regex
        Regex ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>P|F) (?<log>.*)$
        Time_Key    time
        Time_Format %Y-%m-%dT%H:%M:%S.%L%z
kind: ConfigMap

We have 100% single-line json logs.

bellondr commented 2 years ago

@kncesarini This cm is work for me, but kubernetes info is missed log like this

{
    "log": "2022-10-31T05:22:51.368004337Z stderr F 2022/10/31 05:22:51 [notice] 1#1: signal 3 (SIGQUIT) received, shutting down"
}

but I need log like

    "log": "I1031 07:56:32.872420       1 static_autoscaler.go:502] Scale down status: unneededOnly=false lastScaleUpTime=2022-10-10 17:42:14.50727249 +0000 UTC m=+2.131787240 lastScaleDownDeleteTime=2022-10-10 17:42:14.50727255 +0000 UTC m=+2.131787310 lastScaleDownFailTime=2022-10-10 17:42:14.50727263 +0000 UTC m=+2.131787380 scaleDownForbidden=false isDeleteInProgress=false scaleDownInCooldown=false\n",
    "stream": "stderr",
    "kubernetes": {
        "pod_name": "blueprints-addon-cluster-autoscaler-aws-cluster-autoscaler8t8xn",
        "namespace_name": "kube-system",
        "pod_id": "8a1909ae-9cc5-409e-99b7-9c0995ef3b98",
        "host": "ip-10-2-10-107.us-west-2.compute.internal",
        "container_name": "aws-cluster-autoscaler",
        "docker_id": "a4f0283fa8f7c7ba670cba498c4b5b8cf3a9e497cba6171a111444aa25b08749",
        "container_hash": "k8s.gcr.io/autoscaling/cluster-autoscaler@sha256:9494f34a5dcf7202bc08a33a617062cd29b4b57a6914a89bdde2c6a219b0b942",
        "container_image": "k8s.gcr.io/autoscaling/cluster-autoscaler:v1.21.1"
    }
}

Is there any example to get k8s namespace in fluent-bit log? or do eks fargate fluent-bit support this metrics?