[cef] decode_cef processor do not respect ECS and more

The decode_cef processor used in the integration at the input performs an automatic translation of the automatically generated fields into the ECS. The content is not checked in the process. Specifically, we noticed this with the "Data Quality Chek" in Kibana for the "event.outcome" field The field "cef.extension.eventOutcome" is generated automatically and is filled with "None" by default if there are no parameters. As a result, values other than "success,failure,unknown" are written. In our case, this was only noticed thanks to the Data QA. This is fatal. Example result for not present and present:

  "_index": ".ds-logs-cef.log-namespace-2024.02.08-000220",
  "_id": "GufSi40BhceLmrWZKuUF",
  "_version": 1,
  "_score": 0,
  "_ignored": [
    "related.ip"
  ],
  "_source": {
    "agent": {
      "name": "agent-name",
      "id": "id",
      "ephemeral_id": "id",
      "type": "filebeat",
      "version": "8.12.0"
    },
    "process": {
      "name": "Cluster"
    },
    "cef": {
      "severity": "1",
      "extensions": {
        "deviceEventCategory": "System Events",
        "deviceAddress": "0.0.0.0",
        "eventOutcome": "Success",
        "deviceProcessName": "Cluster",
        "message": "Backup complete. File=db-replication-wal-backup-2024-02-09-03-01-01.tar.gz",
        "deviceReceiptTime": "2024-02-09T03:02:27.000Z"
      },
      "name": "Backup",
      "version": "0",
      "device": {
        "product": "ClearPass",
        "event_class_id": "4078",
        "vendor": "Aruba Networks",
        "version": "6.0.0.0"
      }
    },
    "log": {
      "source": {
        "address": "0.0.0.0:44259"
      }
    },
    "elastic_agent": {
      "id": "id",
      "version": "8.12.0",
      "snapshot": false
    },
    "message": "Backup complete. File=db-replication-wal-backup-2024-02-09-03-01-01.tar.gz",
    "tags": [
      "preserve_original_event",
      "cef",
      "forwarded",
      "lnac"
    ],
    "input": {
      "type": "udp"
    },
    "observer": {
      "product": "ClearPass",
      "vendor": "Aruba Networks",
      "ip": "0.0.0.0",
      "version": "6.0.0.0"
    },
    "@timestamp": "2024-02-09T03:02:27.000Z",
    "ecs": {
      "version": "8.11.0"
    },
    "related": {
      "hosts": [
        ""
      ],
      "ip": [
        "",
        "0.0.0.0"
      ],
      "user": [
        ""
      ]
    },
    "data_stream": {
      "namespace": "namepspace",
      "type": "logs",
      "dataset": "cef.log"
    },
    "event": {
      "severity": 1,
      "agent_id_status": "verified",
      "ingested": "2024-02-09T03:03:42Z",
      "original": "<142>Feb 09 2024 04:03:36 0.0.0.0 CEF:0|Aruba Networks|ClearPass|6.0.0.0|4078|Backup|1|cat=System Events dvc=0.0.0.0 rt=Feb 09 2024 04:02:27 deviceProcessName=Cluster outcome=Success msg=Backup complete. File\\=db-replication-wal-backup-2024-02-09-03-01-01.tar.gz\n",
      "code": "4078",
      "kind": "event",
      "category": [
        "authentication"
      ],
      "type": "info",
      "dataset": "cef.log",
      "outcome": "Success"
    }
  },
  "fields": {
    "elastic_agent.version": [
      "8.12.0"
    ],
    "event.category": [
      "authentication"
    ],
    "process.name.text": [
      "Cluster"
    ],
    "cef.extensions.deviceReceiptTime": [
      "2024-02-09T03:02:27.000Z"
    ],
    "observer.vendor": [
      "Aruba Networks"
    ],
    "agent.name": [
      "agent-name"
    ],
    "cef.extensions.eventOutcome": [
      "Success"
    ],
    "event.agent_id_status": [
      "verified"
    ],
    "event.kind": [
      "event"
    ],
    "event.outcome": [
      "Success"
    ],
    "event.severity": [
      1
    ],
    "event.original": [
      "<142>Feb 09 2024 04:03:36 0.0.0.0 CEF:0|Aruba Networks|ClearPass|6.0.0.0|4078|Backup|1|cat=System Events dvc=0.0.0.0 rt=Feb 09 2024 04:02:27 deviceProcessName=Cluster outcome=Success msg=Backup complete. File\\=db-replication-wal-backup-2024-02-09-03-01-01.tar.gz\n"
    ],
    "cef.device.product": [
      "ClearPass"
    ],
    "input.type": [
      "udp"
    ],
    "cef.device.vendor": [
      "Aruba Networks"
    ],
    "data_stream.type": [
      "logs"
    ],
    "related.user": [
      ""
    ],
    "tags": [
      "preserve_original_event",
      "cef",
      "forwarded",
      "lnac"
    ],
    "process.name": [
      "Cluster"
    ],
    "event.code": [
      "4078"
    ],
    "agent.id": [
      "id"
    ],
    "cef.extensions.deviceAddress": [
      "0.0.0.0"
    ],
    "cef.extensions.deviceEventCategory": [
      "System Events"
    ],
    "cef.version": [
      "0"
    ],
    "ecs.version": [
      "8.11.0"
    ],
    "observer.version": [
      "6.0.0.0"
    ],
    "log.source.address": [
      "0.0.0.0:44259"
    ],
    "cef.extensions.deviceProcessName": [
      "Cluster"
    ],
    "agent.version": [
      "8.12.0"
    ],
    "related.hosts": [
      ""
    ],
    "observer.ip": [
      "0.0.0.0"
    ],
    "cef.name": [
      "Backup"
    ],
    "cef.extensions.message": [
      "Backup complete. File=db-replication-wal-backup-2024-02-09-03-01-01.tar.gz"
    ],
    "agent.type": [
      "filebeat"
    ],
    "event.module": [
      "cef"
    ],
    "related.ip": [
      "0.0.0.0"
    ],
    "observer.product": [
      "ClearPass"
    ],
    "elastic_agent.snapshot": [
      false
    ],
    "cef.device.event_class_id": [
      "4078"
    ],
    "elastic_agent.id": [
      "id"
    ],
    "data_stream.namespace": [
      "namespace"
    ],
    "message": [
      "Backup complete. File=db-replication-wal-backup-2024-02-09-03-01-01.tar.gz"
    ],
    "event.ingested": [
      "2024-02-09T03:03:42.000Z"
    ],
    "@timestamp": [
      "2024-02-09T03:02:27.000Z"
    ],
    "data_stream.dataset": [
      "cef.log"
    ],
    "event.type": [
      "info"
    ],
    "agent.ephemeral_id": [
      "id"
    ],
    "cef.device.version": [
      "6.0.0.0"
    ],
    "cef.severity": [
      "1"
    ],
    "event.dataset": [
      "cef.log"
    ]
  },
  "ignored_field_values": {
    "related.ip": [
      ""
    ]
  }
}

{
  "_index": ".ds-logs-cef.log-some-datee",
  "_id": "id",
  "_version": 1,
  "_score": 0,
  "_ignored": [
    "related.ip"
  ],
  "_source": {
    "agent": {
      "name": "agentname",
      "id": "id",
      "type": "filebeat",
      "ephemeral_id": "id",
      "version": "8.12.0"
    },
    "process": {
      "name": "Monitor Long Running Queries"
    },
    "log": {
      "source": {
        "address": "0.0.0.0:57572"
      }
    },
    "cef": {
      "severity": "5",
      "extensions": {
        "deviceEventCategory": "System Events",
        "deviceAddress": "0.0.0.0",
        "eventOutcome": "None",
        "deviceProcessName": "Monitor Long Running Queries",
        "deviceReceiptTime": "2024-02-09T06:07:02.000Z",
        "message": "1 queries were found running longer than 15 minutes. Use CLI command 'cluster diagnostics' to check long running queries. Check logs for query details"
      },
      "name": "Long running queries found",
      "version": "0",
      "device": {
        "product": "ClearPass",
        "event_class_id": "4099",
        "vendor": "Aruba Networks",
        "version": "6.0.0.0"
      }
    },
    "elastic_agent": {
      "id": "id",
      "version": "8.12.0",
      "snapshot": false
    },
    "message": "1 queries were found running longer than 15 minutes. Use CLI command 'cluster diagnostics' to check long running queries. Check logs for query details",
    "tags": [
      "preserve_original_event",
      "cef",
      "forwarded",
      "lnac"
    ],
    "input": {
      "type": "udp"
    },
    "observer": {
      "product": "ClearPass",
      "vendor": "Aruba Networks",
      "ip": "0.0.0.0",
      "version": "6.0.0.0"
    },
    "@timestamp": "2024-02-09T06:07:02.000Z",
    "ecs": {
      "version": "8.11.0"
    },
    "related": {
      "hosts": [
        ""
      ],
      "ip": [
        "",
        "0.0.0.0"
      ],
      "user": [
        ""
      ]
    },
    "data_stream": {
      "namespace": "somespace",
      "type": "logs",
      "dataset": "cef.log"
    },
    "event": {
      "severity": 5,
      "agent_id_status": "verified",
      "ingested": "2024-02-09T06:07:54Z",
      "code": "4099",
      "original": "<143>Feb 09 2024 07:07:51 0.0.0.0 CEF:0|Aruba Networks|ClearPass|6.0.0.0|4099|Long running queries found|5|cat=System Events dvc=0.0.0.0 rt=Feb 09 2024 07:07:02 deviceProcessName=Monitor Long Running Queries outcome=None msg=1 queries were found running longer than 15 minutes. Use CLI command 'cluster diagnostics' to check long running queries. Check logs for query details\n",
      "kind": "event",
      "category": [
        "authentication"
      ],
      "type": "info",
      "dataset": "cef.log",
      "outcome": "None"
    }
  },
  "fields": {
    "elastic_agent.version": [
      "8.12.0"
    ],
    "event.category": [
      "authentication"
    ],
    "process.name.text": [
      "Monitor Long Running Queries"
    ],
    "cef.extensions.deviceReceiptTime": [
      "2024-02-09T06:07:02.000Z"
    ],
    "observer.vendor": [
      "Aruba Networks"
    ],
    "agent.name": [
      "agent-name"
    ],
    "cef.extensions.eventOutcome": [
      "None"
    ],
    "event.agent_id_status": [
      "verified"
    ],
    "event.kind": [
      "event"
    ],
    "event.outcome": [
      "None"
    ],
    "event.severity": [
      5
    ],
    "event.original": [
      "<143>Feb 09 2024 07:07:51 0.0.0.0 CEF:0|Aruba Networks|ClearPass|6.0.0.0|4099|Long running queries found|5|cat=System Events dvc=0.0.0.0 rt=Feb 09 2024 07:07:02 deviceProcessName=Monitor Long Running Queries outcome=None msg=1 queries were found running longer than 15 minutes. Use CLI command 'cluster diagnostics' to check long running queries. Check logs for query details\n"
    ],
    "cef.device.product": [
      "ClearPass"
    ],
    "input.type": [
      "udp"
    ],
    "cef.device.vendor": [
      "Aruba Networks"
    ],
    "data_stream.type": [
      "logs"
    ],
    "related.user": [
      ""
    ],
    "tags": [
      "preserve_original_event",
      "cef",
      "forwarded",
      "lnac"
    ],
    "process.name": [
      "Monitor Long Running Queries"
    ],
    "event.code": [
      "4099"
    ],
    "agent.id": [
      "id"
    ],
    "cef.extensions.deviceAddress": [
      "0.0.0.0"
    ],
    "cef.extensions.deviceEventCategory": [
      "System Events"
    ],
    "cef.version": [
      "0"
    ],
    "ecs.version": [
      "8.11.0"
    ],
    "observer.version": [
      "6.0.0.0"
    ],
    "log.source.address": [
      "0.0.0.0:57572"
    ],
    "cef.extensions.deviceProcessName": [
      "Monitor Long Running Queries"
    ],
    "agent.version": [
      "8.12.0"
    ],
    "related.hosts": [
      ""
    ],
    "observer.ip": [
      "0.0.0.0"
    ],
    "cef.name": [
      "Long running queries found"
    ],
    "cef.extensions.message": [
      "1 queries were found running longer than 15 minutes. Use CLI command 'cluster diagnostics' to check long running queries. Check logs for query details"
    ],
    "agent.type": [
      "filebeat"
    ],
    "event.module": [
      "cef"
    ],
    "related.ip": [
      "0.0.0.0"
    ],
    "observer.product": [
      "ClearPass"
    ],
    "elastic_agent.snapshot": [
      false
    ],
    "cef.device.event_class_id": [
      "4099"
    ],
    "elastic_agent.id": [
      "id"
    ],
    "data_stream.namespace": [
      "namespace"
    ],
    "message": [
      "1 queries were found running longer than 15 minutes. Use CLI command 'cluster diagnostics' to check long running queries. Check logs for query details"
    ],
    "event.ingested": [
      "2024-02-09T06:07:54.000Z"
    ],
    "@timestamp": [
      "2024-02-09T06:07:02.000Z"
    ],
    "data_stream.dataset": [
      "cef.log"
    ],
    "event.type": [
      "info"
    ],
    "agent.ephemeral_id": [
      "id"
    ],
    "cef.device.version": [
      "6.0.0.0"
    ],
    "cef.severity": [
      "5"
    ],
    "event.dataset": [
      "cef.log"
    ]
  },
  "ignored_field_values": {
    "related.ip": [
      ""
    ]
  }
}

Complains in the DQA check:

I think the error here is in the decoder that is used in all inputs. See here as example: https://github.com/elastic/integrations/blob/87e6e91ff250ade3d36636822a2d1329682f7f04/packages/cef/data_stream/log/agent/stream/udp.yml.hbs#L19

I understand that this decoder comes from the filebeat which performs the following processing: https://github.com/elastic/beats/blob/f2e2a4b1ddbb2a330280b23505c9551cc0447eba/x-pack/filebeat/processors/decode_cef/keys.ecs.go

https://github.com/elastic/beats/blob/f2e2a4b1ddbb2a330280b23505c9551cc0447eba/x-pack/filebeat/processors/decode_cef/keys.ecs.go#L95

But that's just my guess. The expected behavior of the automatism would actually be that the content is also checked for quality when the fields are created. This should be urgently revised, as it impairs reliability. In our case, it was a time-consuming process to find out how these incorrectly populated fields come about.

Apart from that, perhaps the entire procedure is no longer smart.

In any case, it would also be desirable to respect the field types and field values

As i have seen, you have already been involved with a commit in the decoder and also here. That's why I'm addressing you @efd6 here. Perhaps the issue would be better placed in the beats?

elastic / integrations

[cef] decode_cef processor do not respect ECS and more #9109