ait-aecid / logdata-anomaly-miner

This tool parses log data and allows to define analysis pipelines for anomaly detection. It was designed to run the analysis with limited resources and lowest possible permissions to make it suitable for production server use.
GNU General Public License v3.0
71 stars 23 forks source link

Json parser misses some paths #1233

Closed landauermax closed 1 year ago

landauermax commented 1 year ago

In the sample below, I would expect paths such as /model/system/cpu/nice/norm/pct/val, but they are missing from the paths found by the NewMatchPathDetector. This means that these paths cannot be analyzed with analysis components.

Logs:

{"agent":{"hostname":"intranet-server","name":"intranet-server","id":"d8e6f857-ec88-4cf5-bc5e-7b72a6fd1e33","ephemeral_id":"c6b20ae2-38c0-4ada-8be2-2d607fddf7e9","version":"7.13.2","type":"metricbeat"},"service":{"type":"system"},"event":{"module":"system","dataset":"system.cpu","duration":630326},"@version":"1","metricset":{"period":45000,"name":"cpu"},"host":{"cpu":{"pct":0.0678},"name":"intranet-server"},"ecs":{"version":"1.9.0"},"@timestamp":"2022-01-21T00:00:22.284Z","tags":["beats_input_raw_event"],"system":{"cpu":{"iowait":{"pct":2.0E-4,"norm":{"pct":2.0E-4}},"steal":{"pct":7.0E-4,"norm":{"pct":7.0E-4}},"irq":{"norm":{"pct":0},"pct":0},"softirq":{"pct":2.0E-4,"norm":{"pct":2.0E-4}},"cores":1,"user":{"pct":0.0283,"norm":{"pct":0.0283}},"total":{"norm":{"pct":0.0678},"pct":0.0678},"nice":{"pct":0,"norm":{"pct":0}},"idle":{"pct":0.932,"norm":{"pct":0.932}},"system":{"pct":0.0386,"norm":{"pct":0.0386}}}}}
{"service":{"type":"system"},"agent":{"hostname":"intranet-server","name":"intranet-server","id":"d8e6f857-ec88-4cf5-bc5e-7b72a6fd1e33","ephemeral_id":"c6b20ae2-38c0-4ada-8be2-2d607fddf7e9","version":"7.13.2","type":"metricbeat"},"event":{"module":"system","dataset":"system.cpu","duration":732367},"@version":"1","metricset":{"period":45000,"name":"cpu"},"host":{"cpu":{"pct":0.0439},"name":"intranet-server"},"ecs":{"version":"1.9.0"},"@timestamp":"2022-01-21T00:01:07.284Z","tags":["beats_input_raw_event"],"system":{"cpu":{"iowait":{"norm":{"pct":2.0E-4},"pct":2.0E-4},"steal":{"pct":5.0E-4,"norm":{"pct":5.0E-4}},"irq":{"pct":0,"norm":{"pct":0}},"softirq":{"norm":{"pct":2.0E-4},"pct":2.0E-4},"user":{"pct":0.0211,"norm":{"pct":0.0211}},"nice":{"pct":0,"norm":{"pct":0}},"total":{"pct":0.0439,"norm":{"pct":0.0439}},"cores":1,"idle":{"pct":0.9559,"norm":{"pct":0.9559}},"system":{"pct":0.0221,"norm":{"pct":0.0221}}}}}

Config:

LearnMode: false
AminerUser: 'aminer'  # optional default: aminer
AminerGroup: 'aminer' # optional default: aminer

LogResourceList:
- url: 'file:///home/ubuntu/replay/russellmitchell/monitoring/logs/logstash/intranet-server/system.cpu.log'
  parser_id: 'MonitoringParsingModel'

Parser:
  - id: monitoring_timestamp
    type: DateTimeModelElement
    name: 'time'
    date_format: '%Y-%m-%dT%H:%M:%S.%fZ' # 2022-01-25T06:42:22.284Z

  - id: monitoring_int
    type: DecimalIntegerValueModelElement
    name: 'val'

  - id: monitoring_val
    type: DecimalFloatValueModelElement
    name: 'val'

  - id: monitoring_str
    type: VariableByteDataModelElement
    name: 'version'
    args: 'abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOPQRSTUVWXYZ0123456789.,;:_-/()[]'

  - id: MonitoringParsingModel
    type: JsonModelElement
    name: 'model'
    key_parser_dict:
      metricset:
        period: ALLOW_ALL
        name: ALLOW_ALL
      agent:
        id: ALLOW_ALL
        hostname: ALLOW_ALL
        type: ALLOW_ALL
        ephemeral_id: ALLOW_ALL
        version: ALLOW_ALL
        name: ALLOW_ALL
      tags:
        -  ALLOW_ALL
      host:
        cpu:
          pct: ALLOW_ALL
        name: ALLOW_ALL
      "@version": ALLOW_ALL
      system:
        cpu:
          cores: ALLOW_ALL
          steal:
            norm:
              pct: monitoring_str
            pct: monitoring_str
          idle:
            norm:
              pct: monitoring_str
            pct: monitoring_str
          nice:
            norm:
              pct: monitoring_val
            pct: monitoring_val
          iowait:
            norm:
              pct: monitoring_str
            pct: monitoring_str
          user:
            norm:
              pct: monitoring_str
            pct: monitoring_str
          total:
            norm:
              pct: monitoring_val
            pct: monitoring_val
          irq:
            norm:
              pct: monitoring_str
            pct: monitoring_str
          system:
            norm:
              pct: monitoring_val
            pct: monitoring_val
          softirq:
            norm:
              pct: monitoring_str
            pct: monitoring_str
      "@timestamp": monitoring_timestamp
      service:
        type: ALLOW_ALL
      event:
        module: ALLOW_ALL
        duration: ALLOW_ALL
        dataset: ALLOW_ALL
      ecs:
        version: ALLOW_ALL

  - id: model
    start: True
    type: FirstMatchModelElement
    name: 'model'
    args:
      - MonitoringParsingModel

Input:
        multi_source: True # optional
        timestamp_paths:
          - "/model/time"

Analysis:

EventHandlers:
        - id: "stpe"
          json: true # optional default: false
          type: "StreamPrinterEventHandler"

Paths:

["string:/model", "string:/model/agent", "string:/model/agent/0", "string:/model/agent/1", "string:/model/agent/2", "string:/model/agent/3", "string:/model/agent/4", "string:/model/ecs", "string:/model/event", "string:/model/event/0", "string:/model/event/1", "string:/model/event/2", "string:/model/host", "string:/model/host/cpu", "string:/model/metricset", "string:/model/metricset/0", "string:/model/metricset/1", "string:/model/service", "string:/model/tags", "string:/model/time"]
ernstleierzopf commented 1 year ago

The initial issue was that ALLOW_ALL was not used in the intended way. ALLOW_ALL can only be used for dictionaries, not for strings like in this case. Instead an AnyByteDataModelElement should be used. Because of that all of the remaining data was removed (as a dictionary was expected) and therefore no paths could be found.

However the issue is still valid, as some paths were missing. This is fixed now.