Closed 1337-42 closed 1 year ago
My best guess is that an active Elastic integration is intercepting the event and mutating it beyond recognition.
system.syslog
ingest pipeline? This ingest pipeline certainly not handle the pasted event as described below:
grok
processor tries (and fails) to map the contents of message
through any of three patterns, none of which will matchon_failure
will set the error.message
field and cause processing to continuemessage
(which would make it difficult to search for)system.syslog.message
to message
, but will not do so because system.syslog.message
on this event is not set.system.syslog.timestamp
, but will fail to do so because that field is not set on this event, causing it to append more to error.message
pipeline => "_none"
.@yaauie thanks for the helpful response! We ran a quick test, running the pipeline with the suggested pipeline => "_none"
flag and the data was succesfuly shipped.
To make sure it was not a fluke, we ran the same logstash pipeline without pipeline => "_none"
, which once again failed to ship the data. During this test, the datastream that Logstash was supposed to ship to was created, but after looking in Kibana logs-*
filtering on the added field, we found 0 hits. Looking in dev tools, we can kind of see what is going on:
GET .ds-logs-system.syslog-logstash_test-2023.04.26-000001/_doc/ayujlIcBbdM3iBUU354C
{
"_index": ".ds-logs-system.syslog-logstash_test-2023.04.26-000001",
"_id": "ayujlIcBbdM3iBUU354C",
"_version": 1,
"_seq_no": 251,
"_primary_term": 1,
"found": true,
"_source": {
"identifier": "logstash_test",
"process": {
"name": "kernel"
},
"agent": {
"name": "[...REDACTED...]",
"id": "[...REDACTED...]",
"type": "filebeat",
"ephemeral_id": "[...REDACTED...]",
"version": "8.7.0"
},
"log": {
"file": {
"path": "/var/log/syslog"
},
"offset": 10324
},
"elastic_agent": {
"id": "[...REDACTED...]",
"version": "8.7.0",
"snapshot": false
},
"message": "UTC 2022 (Ubuntu 5.15.0-1018.24-gcp 5.15.53)",
"cloud": {
"availability_zone": "[...REDACTED...]",
"instance": {
"name": "[...REDACTED...]",
"id": "[...REDACTED...]"
},
"provider": "[...REDACTED...]",
"machine": {
"type": "[...REDACTED...]"
},
"service": {
"name": "[...REDACTED...]"
},
"project": {
"id": "[...REDACTED...]"
},
"account": {
"id": "[...REDACTED...]"
}
},
"input": {
"type": "log"
},
"@timestamp": "2023-09-08T07:14:47.000Z",
"system": {
"syslog": {}
},
"ecs": {
"version": "8.0.0"
},
"data_stream": {
"namespace": "logstash_test",
"type": "logs",
"dataset": "system.syslog"
},
"host": {
"hostname": "[...REDACTED...]",
"os": {
"kernel": "5.15.0-1018-gcp",
"codename": "jammy",
"name": "Ubuntu",
"family": "debian",
"type": "linux",
"version": "22.04.1 LTS (Jammy Jellyfish)",
"platform": "ubuntu"
},
"ip": [
"10.12.0.30",
"fe80::4001:aff:fe0c:1e"
],
"containerized": false,
"name": "[...REDACTED...]",
"id": "[...REDACTED...]",
"mac": [
"[...REDACTED...]"
],
"architecture": "x86_64"
},
"@version": "1",
"event": {
"agent_id_status": "auth_metadata_missing",
"ingested": "2023-04-26T07:10:06Z",
"timezone": "+00:00",
"dataset": "system.syslog"
}
}
}
The message field contains only a part of the ingested string "message": "UTC 2022 (Ubuntu 5.15.0-1018.24-gcp 5.15.53)"
, while the full string would look like [ 0.000000] Linux version 5.15.0-1018-gcp (buildd@lcy02-amd64-074) (gcc (Ubuntu 11.2.0-19ubuntu1) 11.2.0, GNU ld (GNU Binutils for Ubuntu) 2.38) #24-Ubuntu SMP Thu Sep 8 07:14:47 UTC 2022 (Ubuntu 5.15.0-1018.24-gcp 5.15.53)
.
A search in logs-*
on message : "UTC 2022 (Ubuntu 5.15.0-1018.24-gcp 5.15.53)"
or error.message: "UTC 2022 (Ubuntu 5.15.0-1018.24-gcp 5.15.53)"
retrieves 0 results.
Somehow it turns these 254 docs into unsearchable docs. Looking at the mappings for this index, it seems like there shouldn't be any index / search issues.
GET /.ds-logs-system.syslog-logstash_test-2023.04.26-000001/_mapping
{
".ds-logs-system.syslog-logstash_test-2023.04.26-000001": {
"mappings": {
"_meta": {
"managed_by": "fleet",
"managed": true,
"package": {
"name": "system"
}
},
"_data_stream_timestamp": {
"enabled": true
},
"dynamic_templates": [
{
"container.labels": {
"path_match": "container.labels.*",
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
},
{
"strings_as_keyword": {
"match_mapping_type": "string",
"mapping": {
"ignore_above": 1024,
"type": "keyword"
}
}
}
],
"date_detection": false,
"properties": {
"@timestamp": {
"type": "date"
},
"@version": {
"type": "keyword",
"ignore_above": 1024
},
"agent": {
"properties": {
"ephemeral_id": {
"type": "keyword",
"ignore_above": 1024
},
"id": {
"type": "keyword",
"ignore_above": 1024
},
"name": {
"type": "keyword",
"ignore_above": 1024
},
"type": {
"type": "keyword",
"ignore_above": 1024
},
"version": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"cloud": {
"properties": {
"account": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"availability_zone": {
"type": "keyword",
"ignore_above": 1024
},
"image": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"instance": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 1024
},
"name": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"machine": {
"properties": {
"type": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"project": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"provider": {
"type": "keyword",
"ignore_above": 1024
},
"region": {
"type": "keyword",
"ignore_above": 1024
},
"service": {
"properties": {
"name": {
"type": "keyword",
"ignore_above": 1024
}
}
}
}
},
"container": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 1024
},
"image": {
"properties": {
"name": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"name": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"data_stream": {
"properties": {
"dataset": {
"type": "constant_keyword",
"value": "system.syslog"
},
"namespace": {
"type": "constant_keyword",
"value": "logstash_test"
},
"type": {
"type": "constant_keyword",
"value": "logs"
}
}
},
"ecs": {
"properties": {
"version": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"elastic_agent": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 1024
},
"snapshot": {
"type": "boolean"
},
"version": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"event": {
"properties": {
"action": {
"type": "keyword",
"ignore_above": 1024
},
"agent_id_status": {
"type": "keyword",
"ignore_above": 1024
},
"category": {
"type": "keyword",
"ignore_above": 1024
},
"code": {
"type": "keyword",
"ignore_above": 1024
},
"created": {
"type": "date"
},
"dataset": {
"type": "constant_keyword",
"value": "system.syslog"
},
"ingested": {
"type": "date",
"format": "strict_date_time_no_millis||strict_date_optional_time||epoch_millis"
},
"kind": {
"type": "keyword",
"ignore_above": 1024
},
"module": {
"type": "constant_keyword",
"value": "system"
},
"outcome": {
"type": "keyword",
"ignore_above": 1024
},
"provider": {
"type": "keyword",
"ignore_above": 1024
},
"sequence": {
"type": "long"
},
"timezone": {
"type": "keyword",
"ignore_above": 1024
},
"type": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"fingerprint": {
"type": "keyword",
"fields": {
"text": {
"type": "text"
}
}
},
"host": {
"properties": {
"architecture": {
"type": "keyword",
"ignore_above": 1024
},
"containerized": {
"type": "boolean"
},
"domain": {
"type": "keyword",
"ignore_above": 1024
},
"hostname": {
"type": "keyword",
"ignore_above": 1024
},
"id": {
"type": "keyword",
"ignore_above": 1024
},
"ip": {
"type": "ip"
},
"mac": {
"type": "keyword",
"ignore_above": 1024
},
"name": {
"type": "keyword",
"ignore_above": 1024
},
"os": {
"properties": {
"build": {
"type": "keyword",
"ignore_above": 1024
},
"codename": {
"type": "keyword",
"ignore_above": 1024
},
"family": {
"type": "keyword",
"ignore_above": 1024
},
"full": {
"type": "keyword",
"ignore_above": 1024,
"fields": {
"text": {
"type": "match_only_text"
}
}
},
"kernel": {
"type": "keyword",
"ignore_above": 1024
},
"name": {
"type": "keyword",
"ignore_above": 1024,
"fields": {
"text": {
"type": "text"
}
}
},
"platform": {
"type": "keyword",
"ignore_above": 1024
},
"type": {
"type": "keyword",
"ignore_above": 1024
},
"version": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"type": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"identifier": {
"type": "keyword",
"ignore_above": 1024
},
"input": {
"properties": {
"type": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"log": {
"properties": {
"file": {
"properties": {
"path": {
"type": "keyword",
"ignore_above": 1024
}
}
},
"offset": {
"type": "long"
}
}
},
"message": {
"type": "match_only_text"
},
"process": {
"properties": {
"name": {
"type": "keyword",
"ignore_above": 1024,
"fields": {
"text": {
"type": "match_only_text"
}
}
},
"pid": {
"type": "long"
}
}
},
"system": {
"properties": {
"syslog": {
"type": "object"
}
}
}
}
}
}
}
The _settings
also look normal. Any clue how this could be caused?
Other than that, thanks for the solution, as your suggestion did fix the issue.
@yaauie thanks for the support!
Logstash information:
Please include the following information:
Plugins installed: (
bin/logstash-plugin list --verbose
)JVM (e.g.
java -version
):OS version (
uname -a
if on a Unix-like system):Description of the problem including expected versus actual behavior: We are using Logstash for a project to extract data from a cluster and add it to a a second cluster. But after a few runs we noticed that in the destination cluster we are missing a few documents. Narrowing it down we found out that the documents that where missing all contained special characters like
@
and#
in the message field. The other few million documents are indexed as they should.Steps to reproduce:
We use the following pipeline:
All documents transfer just fine except for the ones that look like this:
The logstash logs don't provined any warning or errors.
cc: @Aegrah