influxdata / telegraf

Agent for collecting, processing, aggregating, and writing metrics, logs, and other arbitrary data.
https://influxdata.com/telegraf
MIT License
14.6k stars 5.57k forks source link

cisco_telemetry_mdt not procesing all grpc metadata/messages #12926

Closed rceara closed 1 year ago

rceara commented 1 year ago

Relevant telegraf.conf

# Global Agent Configuration
[agent]
  hostname = "collector1"
  flush_interval = "15s"
  interval = "15s"
  logfile = "/var/log/telegraf/telegraf.log"

[[inputs.cisco_telemetry_mdt]]
transport = "grpc"
service_address = ":57499"

[[outputs.file]]
  files = ["/etc/telegraf/telegraf-grpc-mtls.log"]
  rotation_interval = "4h"
  rotation_max_archives = 1
  rotation_max_size = "0MB"
  #data_format = "influx"
  data_format = "json"
  json_timestamp_units = "1s"

Logs from Telegraf

I believe this is an issue with the cisco_telegraf_mdt input plugin that is only processing consumable information (data of the measurements) and not all the messages/metadata that is received/coming from the device to the Telegraf collector.

An example of what I see in Telegraf (only 1 message) when sending the data to an output.file. No collection start, collection end and any other message related to this input data:

$ tail -F /etc/telegraf/telegraf-grpc-mtls.log:

{
  "fields": {
    "antenna_monitor/detection_time": 12,
    "antenna_monitor/enabled": true,
    "ap_keepalive_state": true,
    "ap_lag_enabled": false,
    "ap_location/floor": 0,
    "ap_location/location": "default-location",
    "ap_mode_data/ap_fabric_data/is_fabric_ap": false,
    "ap_mode_data/ap_sub_mode": "ap-sub-mode-none",
    "ap_mode_data/home_ap_enabled": false,
    "ap_mode_data/wtp_mode": "local-mode",
    "ap_security_data/ap_cert_expiry_time": 1774841885,
    "ap_security_data/ap_cert_issuer_cn": "Cisco Manufacturing CA",
    "ap_security_data/ap_cert_policy": "ap-cert-policy-default",
    "ap_security_data/cert_type": "wireless-cert-mic",
    "ap_security_data/fips_enabled": false,
    "ap_security_data/lsc_ap_auth_type": "lsc-ap-auth-capwap-dtls",
    "ap_security_data/wlancc_enabled": false,
    "ap_services/ap_dhcp_server/is_dhcp_server_enabled": false,
    "ap_state/ap_admin_state": "adminstate-enabled",
    "ap_state/ap_operation_state": "registered",
    "ap_time_info/boot_time": 1677655070,
    "ap_time_info/join_time": 1678739616,
    "ap_time_info/join_time_taken": 1084545,
    "ap_vlan/vlan_tag_id": 0,
    "ap_vlan/vlan_tag_state": "vlan-tagging-disabled",
    "country_code": "US ",
    "device_detail/dynamic_info/led_flash_expiry": 0,
    "device_detail/dynamic_info/led_state_enabled": true,
    "device_detail/dynamic_info/reset_button_state": false,
    "device_detail/static_info/ap_models/model": "AIR-AP3802E-A-K9",
    "device_detail/static_info/board_data/ap_sys_info/mem_size": 0,
    "device_detail/static_info/board_data/wtp_enet_mac": "00:04:05:03:e9:f0",
    "device_detail/static_info/board_data/wtp_serial_num": "1140K1000",
    "device_detail/static_info/board_data_opt/join_priority": 1,
    "device_detail/static_info/num_slots": 2,
    "device_detail/static_info/wtp_model_type": 64,
    "device_detail/wtp_version/backup_sw_version/build": 0,
    "device_detail/wtp_version/backup_sw_version/maint": 0,
    "device_detail/wtp_version/backup_sw_version/release": 0,
    "device_detail/wtp_version/backup_sw_version/version": 0,
    "device_detail/wtp_version/boot_ver/build": 111,
    "device_detail/wtp_version/boot_ver/maint": 98,
    "device_detail/wtp_version/boot_ver/release": 5,
    "device_detail/wtp_version/boot_ver/version": 8,
    "device_detail/wtp_version/mini_ios_version/build": 3,
    "device_detail/wtp_version/mini_ios_version/maint": 0,
    "device_detail/wtp_version/mini_ios_version/release": 51,
    "device_detail/wtp_version/mini_ios_version/version": 0,
    "device_detail/wtp_version/sw_ver/build": 111,
    "device_detail/wtp_version/sw_ver/maint": 98,
    "device_detail/wtp_version/sw_ver/release": 5,
    "device_detail/wtp_version/sw_ver/version": 8,
    "disconnect_detail/disconnect_reason": "unkown",
    "external_module_data/usb_override": false,
    "external_module_data/xm_data/enable": false,
    "grpc_enabled": true,
    "hyperlocation_data/hyperlocation_method": "hyperlocation-method-local",
    "is_master": false,
    "local_dhcp": false,
    "mdns_group_id": 0,
    "mdns_group_method": "mdns-grp-ap-name",
    "name": "TEST-AP-1000",
    "num_radio_slots": 2,
    "sliding_window/multi_window_support": true,
    "sliding_window/window_size": 1,
    "stats_monitor/alarm_hold_time": 6,
    "stats_monitor/alarms_enable": false,
    "stats_monitor/cpu_threshold": 0,
    "stats_monitor/enable": false,
    "stats_monitor/mem_threshold": 0,
    "stats_monitor/sample_intvl": 30,
    "stats_monitor/stats_intvl": 300,
    "stats_monitor/trap_retx_time": 0,
    "tag_info/is_ap_misconfigured": false,
    "tag_info/is_dtls_lsc_fbk_ap": false,
    "tag_info/policy_tag_info/policy_tag_name": "default-policy-tag",
    "tag_info/resolved_tag_info/resolved_policy_tag": "default-policy-tag",
    "tag_info/resolved_tag_info/resolved_rf_tag": "default-rf-tag",
    "tag_info/resolved_tag_info/resolved_site_tag": "default-site-tag",
    "tag_info/rf_tag/rf_tag_name": "default-rf-tag",
    "tag_info/site_tag/ap_profile": "default-ap-profile",
    "tag_info/site_tag/flex_profile": "default-flex-profile",
    "tag_info/site_tag/site_tag_name": "default-site-tag",
    "tag_info/tag_source": "tag-source-default",
    "tunnel/preferred_mode": "preferred-mode-ipv4",
    "tunnel/udp_lite": "udplite-checksum-unconfigured",
    "wtp_ip": "192.168.195.234"
  },
  "name": "Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data",
  "tags": {
    "host": "collector1-wlc1.amazonaccountteam.com",
    "path": "Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data",
    "source": "wlc1.9840",
    "subscription": "102",
    "wtp_mac": "00:04:05:03:e9:00"
  },
  "timestamp": 1678745560
}

System info

Telegraf 1.21.4+ds1-0ubuntu2, Ubuntu 22.04

Docker

No response

Steps to reproduce

If you need to test with a collector please go to the following link to grab one Cisco device: https://devnetsandbox.cisco.com/RM/Diagram/Index/f2e2c0ad-844f-4a73-8085-00b5b28347a1?diagramType=Topology

Sniff of the config to be loaded:

config t
!
netconf-yang
!
telemetry ietf subscription 127
encoding encode-kvgpb
filter xpath /native/boot/system
receiver-type protocol
source-address 10.10.10.1 !This is the address of the source IP router/switch/WLC
stream yang-push
update-policy periodic 6000
receiver name rafa-testing4
!
telemetry ietf subscription 128
encoding encode-kvgpb
filter xpath /device-hardware-xe-oper:device-hardware-data/device-hardware
receiver-type protocol
source-address 10.10.10.1 !This is the address of the source IP router/switch/WLC
stream yang-push
update-policy periodic 90000
receiver name rafa-testing4
!
telemetry receiver protocol rafa-testing4
host ip-address <ip-of-the-collector> 57499
protocol grpc-tcp
![CSR1Kv-Sandbox-ondemand](https://user-images.githubusercontent.com/19786387/227018150-a3782a4f-7800-446e-a7ad-3b1785f8eba1.jpg)

Expected behavior

What I see with the other collectors (2 messages rather than 1) providing more details such as: start and end of the collection. The timestamp is inside the message but that is different from the start/end of when the data was send from the device to the collector.

What we are missing on the outbound message showed in Telegraf: The source of the original message with the port number: Source": "10.93.178.70:59841 The encoding_path:Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data and collection_start_time: 1678740019758 and collection_end_time: 1678740019763. Please notice that the metadata is coming on 2 different message so the 1st message doesn't have the collection_end_time.

message 1 with all the data of the measurement

------- 2023-03-13 20:45:06.96540091 +0000 UTC -------
Summary: GPB(common) Message [10.93.178.70:59841(wlc2.9840)/Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data msg len: 11363]
{
  "Source": "10.93.178.70:59841",
  "Telemetry": {
    "node_id_str": "wlc2.9840",
    "subscription_id_str": "128",
    "encoding_path": "Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data",
    "collection_id": 12,
    "collection_start_time": 1678740019758,
    "msg_timestamp": 1678740019758,
    "collection_end_time": 0
  },
  "Rows": [
    {
      "Timestamp": 1678740019758,
      "Keys": {
        "wtp-mac": "00:2a:10:5a:c1:c0"
      },
      "Content": {
        "antenna-monitor": {
          "detection-time": 0,
          "enabled": false
        },
        "ap-keepalive-state": true,
        "ap-lag-enabled": false,
        "ap-location": {
          "floor": 0,
          "location": "default location"
        },
        "ap-mode-data": {
          "ap-fabric-data": {
            "is-fabric-ap": true
          },
          "ap-sub-mode": "ap-sub-mode-none",
          "home-ap-enabled": true,
          "wtp-mode": "local-mode"
        },
        "ap-security-data": {
          "ap-cert-expiry-time": 1782550686,
          "ap-cert-issuer-cn": "ACT2 SUDI CA",
          "ap-cert-policy": "ap-cert-policy-default",
          "cert-type": "wireless-cert-mic",
          "fips-enabled": false,
          "lsc-ap-auth-type": "lsc-ap-auth-capwap-dtls",
          "wlancc-enabled": false
        },
        "ap-services": {
          "ap-dhcp-server": {
            "is-dhcp-server-enabled": true
          }
        },
        "ap-state": {
          "ap-admin-state": "adminstate-enabled",
          "ap-operation-state": "registered"
        },
        "ap-time-info": {
          "boot-time": 1677901894,
          "join-time": 1678169036,
          "join-time-taken": 26
        },
        "ap-vlan": {
          "vlan-tag-id": 0,
          "vlan-tag-state": "vlan-tagging-disabled"
        },
        "country-code": "US ",
        "device-detail": {
          "dynamic-info": {
            "led-flash-expiry": 0,
            "led-state-enabled": true,
            "reset-button-state": false
          },
          "static-info": {
            "ap-models": {
              "model": "AIR-AP3802I-B-K9"
            },
            "board-data": {
              "ap-sys-info": {
                "cpu-type": " ARMv7 Processor rev 1 (v7l)",
                "mem-size": 1028096,
                "mem-type": "DDR4"
              },
              "wtp-enet-mac": "00:81:c4:66:fc:c4",
              "wtp-serial-num": "FCW2027NBR7"
            },
            "board-data-opt": {
              "join-priority": 1
            },
            "num-slots": 3,
            "wtp-model-type": 65
          },
          "wtp-version": {
            "backup-sw-version": {
              "build": 46,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "boot-ver": {
              "build": 4,
              "maint": 2,
              "release": 1,
              "version": 1
            },
            "mini-ios-version": {
              "build": 0,
              "maint": 0,
              "release": 0,
              "version": 0
            },
            "sw-ver": {
              "build": 49,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "sw-version": "17.9.3.49"
          }
        },
        "disconnect-detail": {
          "disconnect-reason": "wtp-controller-initiated-reason"
        },
        "external-module-data": {
          "usb-override": false,
          "xm-data": {
            "enable": false
          }
        },
        "grpc-enabled": true,
        "hyperlocation-data": {
          "hyperlocation-method": "hyperlocation-method-local"
        },
        "is-master": false,
        "local-dhcp": false,
        "mdns-group-id": 0,
        "mdns-group-method": "mdns-grp-ap-name",
        "mdns-rule-name": "",
        "name": "AIR-AP3802-1",
        "num-radio-slots": 2,
        "reboot-stats": null,
        "sliding-window": {
          "multi-window-support": true,
          "window-size": 1
        },
        "stats-monitor": {
          "alarm-hold-time": 6,
          "alarms-enable": false,
          "cpu-threshold": 0,
          "enable": false,
          "mem-threshold": 0,
          "sample-intvl": 30,
          "stats-intvl": 300,
          "trap-retx-time": 0
        },
        "tag-info": {
          "filter-info": {
            "filter-name": ""
          },
          "is-ap-misconfigured": false,
          "is-dtls-lsc-fbk-ap": false,
          "policy-tag-info": {
            "policy-tag-name": "test_pol_tag"
          },
          "resolved-tag-info": {
            "resolved-policy-tag": "test_pol_tag",
            "resolved-rf-tag": "default-rf-tag",
            "resolved-site-tag": "default-site-tag"
          },
          "rf-tag": {
            "rf-tag-name": "default-rf-tag"
          },
          "site-tag": {
            "ap-profile": "default-ap-profile",
            "flex-profile": "default-flex-profile",
            "site-tag-name": "default-site-tag"
          },
          "tag-source": "tag-source-static"
        },
        "tunnel": {
          "preferred-mode": "preferred-mode-ipv4",
          "udp-lite": "udplite-checksum-unconfigured"
        },
        "wtp-ip": "10.93.178.94"
      }
    },
    {
      "Timestamp": 1678740019758,
      "Keys": {
        "wtp-mac": "74:11:b2:bf:ef:30"
      },
      "Content": {
        "antenna-monitor": {
          "detection-time": 0,
          "enabled": false
        },
        "ap-keepalive-state": true,
        "ap-lag-enabled": false,
        "ap-location": {
          "floor": 0,
          "location": "default location"
        },
        "ap-mode-data": {
          "ap-fabric-data": {
            "is-fabric-ap": true
          },
          "ap-sub-mode": "ap-sub-mode-none",
          "home-ap-enabled": true,
          "wtp-mode": "local-mode"
        },
        "ap-security-data": {
          "ap-cert-expiry-time": 4089992306,
          "ap-cert-issuer-cn": "High Assurance SUDI CA",
          "ap-cert-policy": "ap-cert-policy-default",
          "cert-type": "wireless-cert-mic",
          "fips-enabled": false,
          "lsc-ap-auth-type": "lsc-ap-auth-capwap-dtls",
          "wlancc-enabled": false
        },
        "ap-services": {
          "ap-dhcp-server": {
            "is-dhcp-server-enabled": true
          }
        },
        "ap-state": {
          "ap-admin-state": "adminstate-enabled",
          "ap-operation-state": "registered"
        },
        "ap-time-info": {
          "boot-time": 1677901965,
          "join-time": 1678169045,
          "join-time-taken": 107
        },
        "ap-vlan": {
          "vlan-tag-id": 0,
          "vlan-tag-state": "vlan-tagging-disabled"
        },
        "country-code": "US ",
        "device-detail": {
          "dynamic-info": {
            "led-flash-expiry": 0,
            "led-state-enabled": true,
            "reset-button-state": false
          },
          "static-info": {
            "ap-models": {
              "model": "C9136I-B"
            },
            "board-data": {
              "ap-sys-info": {
                "cpu-type": " ARMv8 Processor rev 4 (v8l)",
                "mem-size": 1752064,
                "mem-type": "DDR4"
              },
              "wtp-enet-mac": "74:11:b2:bc:29:3c",
              "wtp-serial-num": "FJC26351AZL"
            },
            "board-data-opt": {
              "join-priority": 1
            },
            "num-slots": 4,
            "wtp-model-type": 115
          },
          "wtp-version": {
            "backup-sw-version": {
              "build": 46,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "boot-ver": {
              "build": 4,
              "maint": 2,
              "release": 1,
              "version": 1
            },
            "mini-ios-version": {
              "build": 0,
              "maint": 0,
              "release": 0,
              "version": 0
            },
            "sw-ver": {
              "build": 49,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "sw-version": "17.9.3.49"
          }
        },
        "disconnect-detail": {
          "disconnect-reason": "wtp-wait-dtls-no-join-response"
        },
        "external-module-data": {
          "usb-override": false,
          "xm-data": {
            "enable": false
          }
        },
        "grpc-enabled": true,
        "hyperlocation-data": {
          "hyperlocation-method": "hyperlocation-method-wsm"
        },
        "is-master": false,
        "local-dhcp": false,
        "mdns-group-id": 0,
        "mdns-group-method": "mdns-grp-ap-name",
        "mdns-rule-name": "",
        "name": "AP7411.B2BC.293C",
        "num-radio-slots": 4,
        "reboot-stats": null,
        "sliding-window": {
          "multi-window-support": true,
          "window-size": 1
        },
        "stats-monitor": {
          "alarm-hold-time": 6,
          "alarms-enable": false,
          "cpu-threshold": 0,
          "enable": false,
          "mem-threshold": 0,
          "sample-intvl": 30,
          "stats-intvl": 300,
          "trap-retx-time": 0
        },
        "tag-info": {
          "filter-info": {
            "filter-name": ""
          },
          "is-ap-misconfigured": false,
          "is-dtls-lsc-fbk-ap": false,
          "policy-tag-info": {
            "policy-tag-name": "default-policy-tag"
          },
          "resolved-tag-info": {
            "resolved-policy-tag": "default-policy-tag",
            "resolved-rf-tag": "default-rf-tag",
            "resolved-site-tag": "default-site-tag"
          },
          "rf-tag": {
            "rf-tag-name": "default-rf-tag"
          },
          "site-tag": {
            "ap-profile": "default-ap-profile",
            "flex-profile": "default-flex-profile",
            "site-tag-name": "default-site-tag"
          },
          "tag-source": "tag-source-default"
        },
        "tunnel": {
          "preferred-mode": "preferred-mode-ipv4",
          "udp-lite": "udplite-checksum-unconfigured"
        },
        "wtp-ip": "10.93.178.98"
      }
    },
    {
      "Timestamp": 1678740019758,
      "Keys": {
        "wtp-mac": "78:72:5d:51:f3:c0"
      },
      "Content": {
        "antenna-monitor": {
          "detection-time": 0,
          "enabled": false
        },
        "ap-keepalive-state": true,
        "ap-lag-enabled": false,
        "ap-location": {
          "floor": 0,
          "location": "default location"
        },
        "ap-mode-data": {
          "ap-fabric-data": {
            "is-fabric-ap": true
          },
          "ap-sub-mode": "ap-sub-mode-none",
          "home-ap-enabled": true,
          "wtp-mode": "local-mode"
        },
        "ap-security-data": {
          "ap-cert-expiry-time": 1873484741,
          "ap-cert-issuer-cn": "ACT2 SUDI CA",
          "ap-cert-policy": "ap-cert-policy-default",
          "cert-type": "wireless-cert-mic",
          "fips-enabled": false,
          "lsc-ap-auth-type": "lsc-ap-auth-capwap-dtls",
          "wlancc-enabled": false
        },
        "ap-services": {
          "ap-dhcp-server": {
            "is-dhcp-server-enabled": true
          }
        },
        "ap-state": {
          "ap-admin-state": "adminstate-disabled",
          "ap-operation-state": "registered"
        },
        "ap-time-info": {
          "boot-time": 1677901896,
          "join-time": 1678169036,
          "join-time-taken": 26
        },
        "ap-vlan": {
          "vlan-tag-id": 0,
          "vlan-tag-state": "vlan-tagging-disabled"
        },
        "country-code": "US ",
        "device-detail": {
          "dynamic-info": {
            "led-flash-expiry": 0,
            "led-state-enabled": true,
            "reset-button-state": false
          },
          "static-info": {
            "ap-models": {
              "model": "AIR-AP3802I-B-K9"
            },
            "board-data": {
              "ap-sys-info": {
                "cpu-type": " ARMv7 Processor rev 1 (v7l)",
                "mem-size": 1028096,
                "mem-type": "DDR4"
              },
              "wtp-enet-mac": "78:72:5d:4f:16:c6",
              "wtp-serial-num": "FCW2229N93M"
            },
            "board-data-opt": {
              "join-priority": 1
            },
            "num-slots": 3,
            "wtp-model-type": 65
          },
          "wtp-version": {
            "backup-sw-version": {
              "build": 46,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "boot-ver": {
              "build": 4,
              "maint": 2,
              "release": 1,
              "version": 1
            },
            "mini-ios-version": {
              "build": 0,
              "maint": 0,
              "release": 0,
              "version": 0
            },
            "sw-ver": {
              "build": 49,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "sw-version": "17.9.3.49"
          }
        },
        "disconnect-detail": {
          "disconnect-reason": "wtp-controller-initiated-reason"
        },
        "external-module-data": {
          "usb-override": false,
          "xm-data": {
            "enable": false
          }
        },
        "grpc-enabled": true,
        "hyperlocation-data": {
          "hyperlocation-method": "hyperlocation-method-local"
        },
        "is-master": false,
        "local-dhcp": false,
        "mdns-group-id": 0,
        "mdns-group-method": "mdns-grp-ap-name",
        "mdns-rule-name": "",
        "name": "AIR-AP3802-2",
        "num-radio-slots": 2,
        "reboot-stats": null,
        "sliding-window": {
          "multi-window-support": true,
          "window-size": 1
        },
        "stats-monitor": {
          "alarm-hold-time": 6,
          "alarms-enable": false,
          "cpu-threshold": 0,
          "enable": false,
          "mem-threshold": 0,
          "sample-intvl": 30,
          "stats-intvl": 300,
          "trap-retx-time": 0
        },
        "tag-info": {
          "filter-info": {
            "filter-name": ""
          },
          "is-ap-misconfigured": false,
          "is-dtls-lsc-fbk-ap": false,
          "policy-tag-info": {
            "policy-tag-name": "test_pol_tag"
          },
          "resolved-tag-info": {
            "resolved-policy-tag": "test_pol_tag",
            "resolved-rf-tag": "default-rf-tag",
            "resolved-site-tag": "default-site-tag"
          },
          "rf-tag": {
            "rf-tag-name": "default-rf-tag"
          },
          "site-tag": {
            "ap-profile": "default-ap-profile",
            "flex-profile": "default-flex-profile",
            "site-tag-name": "default-site-tag"
          },
          "tag-source": "tag-source-static"
        },
        "tunnel": {
          "preferred-mode": "preferred-mode-ipv4",
          "udp-lite": "udplite-checksum-unconfigured"
        },
        "wtp-ip": "10.93.178.79"
      }
    },
    {
      "Timestamp": 1678740019758,
      "Keys": {
        "wtp-mac": "ac:7a:56:5b:7e:e0"
      },
      "Content": {
        "antenna-monitor": {
          "detection-time": 0,
          "enabled": false
        },
        "ap-keepalive-state": true,
        "ap-lag-enabled": false,
        "ap-location": {
          "floor": 0,
          "location": "default location"
        },
        "ap-mode-data": {
          "ap-fabric-data": {
            "is-fabric-ap": true
          },
          "ap-sub-mode": "ap-sub-mode-none",
          "home-ap-enabled": true,
          "wtp-mode": "local-mode"
        },
        "ap-security-data": {
          "ap-cert-expiry-time": 2141643617,
          "ap-cert-issuer-cn": "Cisco Manufacturing CA SHA2",
          "ap-cert-policy": "ap-cert-policy-default",
          "cert-type": "wireless-cert-mic",
          "fips-enabled": false,
          "lsc-ap-auth-type": "lsc-ap-auth-capwap-dtls",
          "wlancc-enabled": false
        },
        "ap-services": {
          "ap-dhcp-server": {
            "is-dhcp-server-enabled": true
          }
        },
        "ap-state": {
          "ap-admin-state": "adminstate-enabled",
          "ap-operation-state": "registered"
        },
        "ap-time-info": {
          "boot-time": 1677905164,
          "join-time": 1678169052,
          "join-time-taken": 14
        },
        "ap-vlan": {
          "vlan-tag-id": 0,
          "vlan-tag-state": "vlan-tagging-disabled"
        },
        "country-code": "US ",
        "device-detail": {
          "dynamic-info": {
            "led-flash-expiry": 0,
            "led-state-enabled": true,
            "reset-button-state": false
          },
          "static-info": {
            "ap-models": {
              "model": "IW3702-4E-A-K9"
            },
            "board-data": {
              "ap-sys-info": {
                "cpu-type": "PowerPC CPU at 800Mhz, revision number 0x215",
                "mem-size": 376814,
                "mem-type": "DDR3"
              },
              "wtp-enet-mac": "ac:7a:56:48:b3:68",
              "wtp-serial-num": "FTX2425P00C"
            },
            "board-data-opt": {
              "join-priority": 1
            },
            "num-slots": 2,
            "wtp-model-type": 60
          },
          "wtp-version": {
            "backup-sw-version": {
              "build": 0,
              "maint": 0,
              "release": 0,
              "version": 0
            },
            "boot-ver": {
              "build": 0,
              "maint": 0,
              "release": 3,
              "version": 15
            },
            "mini-ios-version": {
              "build": 0,
              "maint": 0,
              "release": 0,
              "version": 0
            },
            "sw-ver": {
              "build": 49,
              "maint": 3,
              "release": 9,
              "version": 17
            },
            "sw-version": "15.3(3)JPN2$"
          }
        },
        "disconnect-detail": {
          "disconnect-reason": "unkown"
        },
        "external-module-data": {
          "usb-override": false,
          "xm-data": {
            "enable": false
          }
        },
        "grpc-enabled": true,
        "hyperlocation-data": {
          "hyperlocation-method": "hyperlocation-method-local"
        },
        "is-master": false,
        "local-dhcp": false,
        "mdns-group-id": 0,
        "mdns-group-method": "mdns-grp-ap-name",
        "mdns-rule-name": "",
        "name": "IW3702-4E-1",
        "num-radio-slots": 2,
        "reboot-stats": null,
        "sliding-window": {
          "multi-window-support": false,
          "window-size": 1
        },
        "stats-monitor": {
          "alarm-hold-time": 6,
          "alarms-enable": false,
          "cpu-threshold": 0,
          "enable": false,
          "mem-threshold": 0,
          "sample-intvl": 30,
          "stats-intvl": 300,
          "trap-retx-time": 0
        },
        "tag-info": {
          "filter-info": {
            "filter-name": ""
          },
          "is-ap-misconfigured": false,
          "is-dtls-lsc-fbk-ap": false,
          "policy-tag-info": {
            "policy-tag-name": "iosap-policy-tag"
          },
          "resolved-tag-info": {
            "resolved-policy-tag": "iosap-policy-tag",
            "resolved-rf-tag": "default-rf-tag",
            "resolved-site-tag": "iosap-site-tag"
          },
          "rf-tag": {
            "rf-tag-name": "default-rf-tag"
          },
          "site-tag": {
            "ap-profile": "default-ap-profile",
            "flex-profile": "default-flex-profile",
            "site-tag-name": "iosap-site-tag"
          },
          "tag-source": "tag-source-static"
        },
        "tunnel": {
          "preferred-mode": "preferred-mode-ipv4",
          "udp-lite": "udplite-checksum-unconfigured"
        },
        "wtp-ip": "10.93.178.95"
      }
    }
  ]
}

message 2 which is correlated to the 1st message

------- 2023-03-13 20:45:06.97117944 +0000 UTC -------
Summary: GPB(common) Message [10.93.178.70:59841(wlc2.9840)/Cisco-IOS-XE-wireless-access-point-oper:wireless-access-point-oper:access-point-oper-data/capwap-data msg len: 142]
{
  "Source": "10.93.178.70:59841",
  "Telemetry": {
    "node_id_str": "wlc2.9840",
    "subscription_id_str": "128",
    "encoding_path": "Cisco-IOS-XE-wireless-access-point-oper:wireless-access-point-oper:access-point-oper-data/capwap-data",
    "collection_id": 12,
    "collection_start_time": 1678740019758,
    "msg_timestamp": 1678740019763,
    "collection_end_time": 1678740019763
  }
}

Actual behavior

I believe this is an issue with the cisco_telegraf_mdt input plugin that is only processing consumable information (data of the measurements) and not all the messages/metadata that is received/coming from the device to the Telegraf collector.

An example of what I see in Telegraf (only 1 message) when sending the data to an output.file. No collection start, collection end and any other message related to this input data:

$ tail -F /etc/telegraf/telegraf-grpc-mtls.log:

{
  "fields": {
    "antenna_monitor/detection_time": 12,
    "antenna_monitor/enabled": true,
    "ap_keepalive_state": true,
    "ap_lag_enabled": false,
    "ap_location/floor": 0,
    "ap_location/location": "default-location",
    "ap_mode_data/ap_fabric_data/is_fabric_ap": false,
    "ap_mode_data/ap_sub_mode": "ap-sub-mode-none",
    "ap_mode_data/home_ap_enabled": false,
    "ap_mode_data/wtp_mode": "local-mode",
    "ap_security_data/ap_cert_expiry_time": 1774841885,
    "ap_security_data/ap_cert_issuer_cn": "Cisco Manufacturing CA",
    "ap_security_data/ap_cert_policy": "ap-cert-policy-default",
    "ap_security_data/cert_type": "wireless-cert-mic",
    "ap_security_data/fips_enabled": false,
    "ap_security_data/lsc_ap_auth_type": "lsc-ap-auth-capwap-dtls",
    "ap_security_data/wlancc_enabled": false,
    "ap_services/ap_dhcp_server/is_dhcp_server_enabled": false,
    "ap_state/ap_admin_state": "adminstate-enabled",
    "ap_state/ap_operation_state": "registered",
    "ap_time_info/boot_time": 1677655070,
    "ap_time_info/join_time": 1678739616,
    "ap_time_info/join_time_taken": 1084545,
    "ap_vlan/vlan_tag_id": 0,
    "ap_vlan/vlan_tag_state": "vlan-tagging-disabled",
    "country_code": "US ",
    "device_detail/dynamic_info/led_flash_expiry": 0,
    "device_detail/dynamic_info/led_state_enabled": true,
    "device_detail/dynamic_info/reset_button_state": false,
    "device_detail/static_info/ap_models/model": "AIR-AP3802E-A-K9",
    "device_detail/static_info/board_data/ap_sys_info/mem_size": 0,
    "device_detail/static_info/board_data/wtp_enet_mac": "00:04:05:03:e9:f0",
    "device_detail/static_info/board_data/wtp_serial_num": "1140K1000",
    "device_detail/static_info/board_data_opt/join_priority": 1,
    "device_detail/static_info/num_slots": 2,
    "device_detail/static_info/wtp_model_type": 64,
    "device_detail/wtp_version/backup_sw_version/build": 0,
    "device_detail/wtp_version/backup_sw_version/maint": 0,
    "device_detail/wtp_version/backup_sw_version/release": 0,
    "device_detail/wtp_version/backup_sw_version/version": 0,
    "device_detail/wtp_version/boot_ver/build": 111,
    "device_detail/wtp_version/boot_ver/maint": 98,
    "device_detail/wtp_version/boot_ver/release": 5,
    "device_detail/wtp_version/boot_ver/version": 8,
    "device_detail/wtp_version/mini_ios_version/build": 3,
    "device_detail/wtp_version/mini_ios_version/maint": 0,
    "device_detail/wtp_version/mini_ios_version/release": 51,
    "device_detail/wtp_version/mini_ios_version/version": 0,
    "device_detail/wtp_version/sw_ver/build": 111,
    "device_detail/wtp_version/sw_ver/maint": 98,
    "device_detail/wtp_version/sw_ver/release": 5,
    "device_detail/wtp_version/sw_ver/version": 8,
    "disconnect_detail/disconnect_reason": "unkown",
    "external_module_data/usb_override": false,
    "external_module_data/xm_data/enable": false,
    "grpc_enabled": true,
    "hyperlocation_data/hyperlocation_method": "hyperlocation-method-local",
    "is_master": false,
    "local_dhcp": false,
    "mdns_group_id": 0,
    "mdns_group_method": "mdns-grp-ap-name",
    "name": "TEST-AP-1000",
    "num_radio_slots": 2,
    "sliding_window/multi_window_support": true,
    "sliding_window/window_size": 1,
    "stats_monitor/alarm_hold_time": 6,
    "stats_monitor/alarms_enable": false,
    "stats_monitor/cpu_threshold": 0,
    "stats_monitor/enable": false,
    "stats_monitor/mem_threshold": 0,
    "stats_monitor/sample_intvl": 30,
    "stats_monitor/stats_intvl": 300,
    "stats_monitor/trap_retx_time": 0,
    "tag_info/is_ap_misconfigured": false,
    "tag_info/is_dtls_lsc_fbk_ap": false,
    "tag_info/policy_tag_info/policy_tag_name": "default-policy-tag",
    "tag_info/resolved_tag_info/resolved_policy_tag": "default-policy-tag",
    "tag_info/resolved_tag_info/resolved_rf_tag": "default-rf-tag",
    "tag_info/resolved_tag_info/resolved_site_tag": "default-site-tag",
    "tag_info/rf_tag/rf_tag_name": "default-rf-tag",
    "tag_info/site_tag/ap_profile": "default-ap-profile",
    "tag_info/site_tag/flex_profile": "default-flex-profile",
    "tag_info/site_tag/site_tag_name": "default-site-tag",
    "tag_info/tag_source": "tag-source-default",
    "tunnel/preferred_mode": "preferred-mode-ipv4",
    "tunnel/udp_lite": "udplite-checksum-unconfigured",
    "wtp_ip": "192.168.195.234"
  },
  "name": "Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data",
  "tags": {
    "host": "collector1-wlc1.amazonaccountteam.com",
    "path": "Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data",
    "source": "wlc1.9840",
    "subscription": "102",
    "wtp_mac": "00:04:05:03:e9:00"
  },
  "timestamp": 1678745560
}

Additional info

I provided additional information of a sandbox device you can use from developer.cisco.com in case you want to reproduce the problem with a collector. The issue seems to be very clear, the data is not coming complete with all the messages that involves the metadata of a grpc message.

powersj commented 1 year ago

Hi,

No collection start, collection end and any other message related to this input data:

Why are those fields important? In terms of timing, the timestamp is set based on the timestamp field if data if it exists.

What I see with the other collectors (2 messages rather than 1)

Telegraf generates metrics. I would be careful to not to compare it to a data collector. If you take a step back, every metric produced by telegraf will use some set of fields. In general, we do not pass through all data collected in all plugins. Someone at some point made a decision about what is relevant information, what data to collect, and how to craft a metric out of that data.

The issue seems to be very clear, the data is not coming complete with all the messages that involves the metadata of a grpc message.

It would be far more helpful if you called out what additional fields you think are missing and need to be added, and more importantly why those fields need to be added.

Thanks!

rceara commented 1 year ago

Hi,

No collection start, collection end and any other message related to this input data:

Why are those fields important? In terms of timing, the timestamp is set based on the timestamp field if data if it exists.

What I see with the other collectors (2 messages rather than 1)

Telegraf generates metrics. I would be careful to not to compare it to a data collector. If you take a step back, every metric produced by telegraf will use some set of fields. In general, we do not pass through all data collected in all plugins. Someone at some point made a decision about what is relevant information, what data to collect, and how to craft a metric out of that data.

The issue seems to be very clear, the data is not coming complete with all the messages that involves the metadata of a grpc message.

It would be far more helpful if you called out what additional fields you think are missing and need to be added, and more importantly why those fields need to be added.

Thanks!

I just added a comment explaining what we are missing. Those fields are important because the device doesn't send all data in 1 message so the collection start time and the collection end time are not in the same message. This raw data can be send to an outbound file to verify if the raw data is coming complete and not in pieces. It's very hard to troubleshoot this with TCPDUMP (which we did) to verify that some data was missing when we dump it to a textfile on telegraf. We needed to use another collector to verify this was the case as well and confirmed that telegraf was missing some important fields that were not populated on the text file.

What we are missing on the outbound message showed in Telegraf: The source of the original message with the port number: Source": "10.93.178.70:59841 The encoding_path of the 1st and last message: Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data and collection_start_time: 1678740019758 and collection_end_time: 1678740019763. Please notice that the metadata is coming on 2 different message so the 1st message doesn't have the collection_end_time.

powersj commented 1 year ago

The source of the original message with the port number: Source": "10.93.178.70:59841

This sounded familiar and a lot like https://github.com/influxdata/telegraf/issues/11920 Is that the same issue?

The encoding_path of the 1st and last message: Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data

This is stored as the tag path here, which is in your example above. Is your point that this second message does not have it? Is this message actually in the 2nd message?

collection_start_time: 1678740019758 collection_end_time: 1678740019763

Are these ever collected by telegraf in any message?

rceara commented 1 year ago

The source of the original message with the port number: Source": "10.93.178.70:59841

This sounded familiar and a lot like #11920 Is that the same issue?

The encoding_path of the 1st and last message: Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data

This is stored as the tag path here, which is in your example above. Is your point that this second message does not have it? Is this message actually in the 2nd message?

collection_start_time: 1678740019758 collection_end_time: 1678740019763

Are these ever collected by telegraf in any message?

I shared 2 different examples: One example from the telegraf collector and another example from another collector. The output that i sent from telegraf is different from the output I shared from the other collector where it shows the raw data that is coming via gRPC. So, as you can see and understand the difference in the output and what is missing.

The issue on #11920 seems to be different because what I'm explaining is that we are missing information that is not being populated to the output of the textfile when telegraf process the data. On the Expected behavior (testing with the other collector) is what I verify using that collector (not telegraf) and is bringing all the raw data as it was received from the Cisco device.

Telegraf is not passing to the output textfile document all the information and messages received as I explained above: The source of the original message with the port number: Source": "10.93.178.70:59841 The encoding_path of the 1st and last message: Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data and collection_start_time: 1678740019758 and collection_end_time: 1678740019763.

The collection_start_time and collection_end_time are coming in 2 different messages.

Please let me know if is clear and make sense.

powersj commented 1 year ago

Please let me know if is clear and make sense.

I am sorry, but I am not following at all. You had identified some fields that may or may not be missing and coming in different messages. I think you are making some assumption that you want telegraf to combine these messages or wait for the second to have all the data, but again not following

rceara commented 1 year ago

Please let me know if is clear and make sense.

I am sorry, but I am not following at all. You had identified some fields that may or may not be missing and coming in different messages. I think you are making some assumption that you want telegraf to combine these messages or wait for the second to have all the data, but again not following

Its pretty simple: In my original message I shared the output that telegraf is dumping on the output textfile (Logs from Telegraf). If you compare the "Logs from Telegraf" vs What I shared on the "Expected behavior" from another collector I'm using (not telegraf collector), you will notice that we are missing on the telegraf output: the port number: Source": "10.93.178.70:59841 The encoding_path of the 1st and last message: Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data and collection_start_time: 1678740019758 and collection_end_time: 1678740019763. Does it make sense?

powersj commented 1 year ago

What I shared on the "Expected behavior" from another collector

Show me what you expect to get from Telegraf, not some other collector please.

rceara commented 1 year ago

What I shared on the "Expected behavior" from another collector

Show me what you expect to get from Telegraf, not some other collector please.

I'm expecting to see in the output.textfile the source with the port of the sender, the encoding_path from the 1st and last message that contains the data, the collection start and collection end time.

Example: The source of the original message with the port number: Source": "10.93.178.70:59841 The encoding_path of the 1st and last message: Cisco-IOS-XE-wireless-access-point-oper:access-point-oper-data/capwap-data and collection_start_time: 1678740019758 and collection_end_time: 1678740019763.

srebhan commented 1 year ago

Let me jump into this discussion... @rceara you are saying that your device sends one metric in two parts as two separate messages, the first one basically contains all data that is currently translated to a metric by Telegraf and a second message that contains some additional meta-data like the collection-end time. Is this understanding correct?

If so, I would be interested to learn how Telegraf can know that there is a second message following before it arrives. This is important for Telegraf to "hold-back" the first metric and fuse it with the second one (e.g. override the collection_end_time with the value of the second message).

Furthermore, how can Telegraf know which messages belong together? It seems like the msg_timestamp is different between the two messages while node_id_str, subscription_id_str andcollection_id are identical. This also touches my previous question: How do we know we should fuse the two messages? When do we start a new metric?

rceara commented 1 year ago

Your statement and explanation is correct. We don't send all the data in one message but in multiple messages. Therefore, we start all messages with a collection_start_time that is the same but don't close the collection_end_time until all messages of that specific group/collection is sent. On each message we send, we do it with a timestamp, to track when each message was sent out of the device. The collection_start_time and collection_end_time are unique for each set/group of messages the device send with the data. My question is: Why telegraf doesn't process in an outbound textfile the source:port, collection_start_time and collection_end_time if those fields are also part of the grpc message? It's possible that telegraf process all the data as received (raw data) and dump it to the outbound.textfile so in case any troubleshooting needs to be done we can do it by verifying the timers of the collection_start_time and collection_end_time? As I said before, If I do a packet capture with tcpdump I can see all these information in the info header message of the tcp packet but is extremely difficult to troubleshoot that way a problem related to the collection of the data. It will be awesome if telegraf can show how the raw data is coming, rather than showing consumable data (metrics) only. Maybe we can have a new plugin called outbound.rawdatafile showing how the raw data is coming to the collector? Just floating some ideas so we kept the existing outbound.file the same and create a new outbound plugin for that purpose :)

srebhan commented 1 year ago

@rceara I think you are somehow misinterpreting the intention or concept behind Telegraf. Let me clarify:

Telegraf is an agent for collecting, processing, aggregating, and writing metrics

This is the first line in the Readme. Telegraf is not a "collect whatever data-source you have and write the raw data to a file" agent. This being said, the idea is to query different data-sources (or provide listeners as for the cisco_telemetry_mdt input) and convert/standardize/transform it to a metric. This transformation might be lossy for some inputs but it allows a downstream user to compute using those metrics (e.g. computing statistics over different types of devices). So an outbound.rawdatafile plugin is out-of-scope for Telegraf. If you only require this for debugging, feel free to add a PR to enable debug logging the raw data.

This being said, we are currently missing the fields you mentioned because they are not extracted in the current code. You can correct this by opening a pull-request to add the missing fields to the metric created by Telegraf. I'm more than happy to review such a PR.

Regarding the merging of the multiple messages you mention, I'm not sure if your approach is the general approach for all devices supporting Cisco MDT. Can you elaborate on this? If this is the way all devices handle message splitting, I'd like to see a PR for fixing Telegraf. If not, you can still submit a PR and enable this type of message merging by adding a new option.

rceara commented 1 year ago

Ok, understood your message and sounds good to me! I will open a PR internally for the enablement of debugging login for the raw data for cisco_telemetry_mdt. I think that will make sense. Again, the information you are collecting and sending to any outbound (Chronograf, influxdb, textfile) is correct but we are missing some important fields (source:port, encoding_path, collection_start_time and collection_stop_time) that are very important for debugging purpose.

telegraf-tiger[bot] commented 1 year ago

Hello! I am closing this issue due to inactivity. I hope you were able to resolve your problem, if not please try posting this question in our Community Slack or Community Forums or provide additional details in this issue and reqeust that it be re-opened. Thank you!