influxdata / telegraf

Agent for collecting, processing, aggregating, and writing metrics, logs, and other arbitrary data.
https://influxdata.com/telegraf
MIT License
14.51k stars 5.55k forks source link

inputs.gnmi: Difference in metrics from input to output #9117

Closed sjwang90 closed 1 year ago

sjwang90 commented 3 years ago

Summary of behavior:

From the example input/output below there are inconsistencies between in the following metrics Input:

"adjacency-uptime":2492975,
"adjacency-holdtime":6,
"adjacency-checkpoint-object-id":1073753696,

Output:

"adjacency_checkpoint_object_id":1073753568,
"adjacency_holdtime":8,
"adjacency_uptime":1135367,

Relevant telegraf.conf:

[[inputs.gnmi]]
  ## Address and port of the gNMI GRPC server
  addresses = ["XXXXXX:57500"]

  ## define credentials
  username = "admin"
  password = "cisco"
  ## gNMI encoding requested (one of: "proto", "json", "json_ietf", "bytes")
  encoding = "proto"
  ## redial in case of failures after
  redial = "10s"

  [[inputs.gnmi.subscription]]
    name = "health_uptime"
    origin = "Cisco-IOS-XR-shellutil-oper"
    path = "/system-time/uptime"
    # Subscription mode (one of: "target_defined", "sample", "on_change") and interval
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "routing_isis_interface"
    origin = "Cisco-IOS-XR-clns-isis-oper"
    path = "/isis/instances/instance/interfaces/interface"
    # Subscription mode (one of: "target_defined", "sample", "on_change") and interval
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "interface_bundles_oper"
    origin = "Cisco-IOS-XR-bundlemgr-oper"
    path = "/bundle-information/bundle/bundle-bundles/bundle-bundle/bundle-bundle-descendant"
    # Subscription mode (one of: "target_defined", "sample", "on_change") and interval
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "health_cpu_utilization"
    origin = "Cisco-IOS-XR-wdsysmon-fd-oper"
    path = "/system-monitoring/cpu-utilization"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "health_node_summary"
    origin = "Cisco-IOS-XR-nto-misc-oper"
    path = "/memory-summary/nodes/node/summary"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "interfaces_summary"
    origin = "Cisco-IOS-XR-pfi-im-cmd-oper"
    path = "/interfaces/interface-summary"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "interface_operation_state_brief"
    origin = "Cisco-IOS-XR-pfi-im-cmd-oper"
    path = "/interfaces/interface-briefs/interface-brief"
    subscription_mode = "sample"
    sample_interval = "10s"

        [[inputs.gnmi.subscription]]
    name = "interface_statistics_data_rate"
    origin = "Cisco-IOS-XR-infra-statsd-oper"
    path = "/infra-statistics/interfaces/interface/latest/data-rate"
    subscription_mode = "sample"
    sample_interval = "10s"

     [[inputs.gnmi.subscription]]
    name = "interface_statistics_generic_counters"
    origin = "Cisco-IOS-XR-infra-statsd-oper"
    path = "/infra-statistics/interfaces/interface/latest/generic-counters"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "qos_input"
    origin = "Cisco-IOS-XR-qos-ma-oper"
    path = "/qos/nodes/node/policy-map/interface-table/interface/input/service-policy-names/service-policy-instance/statistics"
    # Subscription mode (one of: "target_defined", "sample", "on_change") and interval
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "qos_output"
    origin = "Cisco-IOS-XR-qos-ma-oper"
    path = "/qos/nodes/node/policy-map/interface-table/interface/output/service-policy-names/service-policy-instance/statistics"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "routing_adjacency"
    origin = "Cisco-IOS-XR-clns-isis-oper"
    path = "/isis/instances/instance/levels/level/adjacencies/adjacency"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "routing_neighbor"
    origin = "Cisco-IOS-XR-ipv4-bgp-oper"
    path = "/bgp/instances/instance/instance-active/vrfs/vrf/neighbors/neighbor"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "routing_default_vrf"
    origin = "Cisco-IOS-XR-ipv4-bgp-oper"
    path = "/bgp/instances/instance/instance-active/default-vrf/neighbors/neighbor"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "optics_info"
    origin = "Cisco-IOS-XR-controller-optics-oper"
    path = "/optics-oper/optics-ports/optics-port/optics-info"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "environments_pem_attributes"
    origin = "Cisco-IOS-XR-sysadmin-envmon-ui"
    path = "/environment/oper/power/location/pem_attributes"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "environments_sensor_attributes"
    origin = "Cisco-IOS-XR-sysadmin-envmon-ui"
    path = "/environment/oper/temperatures/location/sensor_attributes"
    subscription_mode = "sample"
    sample_interval = "10s"

    [[inputs.gnmi.subscription]]
    name = "environments_fan_attributes"
    origin = "Cisco-IOS-XR-sysadmin-envmon-ui"
    path = "/environment/oper/fan/location/fan_attributes"
    subscription_mode = "sample"
    sample_interval = "10s"

# # A plugin that can transmit metrics over HTTP
[[outputs.http]]
#   ## URL is the address to send metrics to
  url = "https://ah-1105918-001.sdi.corp.bankofamerica.com:9005/routertosplunk"
#
#   ## Timeout for HTTP message
#   # timeout = "5s"
#
#   ## HTTP method, one of: "POST" or "PUT"
 method = "POST"

#
#   ## Optional TLS Config
#   # tls_ca = "/etc/telegraf/ca.pem"
#   # tls_cert = "/etc/telegraf/cert.pem"
#   # tls_key = "/etc/telegraf/key.pem"
#   ## Use TLS but skip chain & host verification
# insecure_skip_verify = false
#
#   ## Data format to output.
#   ## Each data format has it's own unique set of configuration options, read
#   ## more about them here:
#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
 data_format = "json"
 json_timestamp_units ="10ms"
#
#   ## HTTP Content-Encoding for write request body, can be set to "gzip" to
#   ## compress body or "identity" to apply no encoding.
#   # content_encoding = "identity"
#
#   ## Additional HTTP headers
  [outputs.http.headers]
#   #   # Should be set manually to "application/json" for json data_format
 Content-Type = "application/json"

[[outputs.file]]
 files = ["/etc/telegraf/metrics.out"]
data_format = "json"
json_timestamp_units = "10ms"

System info:

Steps to reproduce:

  1. Source : ASR9910, With IOS-XR version 6.6.3
  2. Configuration at router : Dial – In mode
  3. Have the telegraf.conf as mentioned in point 2.

Error Messages : No Error messages. Input to Telegraf is not matching with output of telegraf.

Sample Input:

    "data_json":[
       {
          "timestamp":"1609952229481",
          "keys":[
             {
                "instance-name":"1"
             },
             {
                "level":"level2"
             },
             {
                "system-id":"0112.3200.1011"
             },
             {
                "interface-name":"Bundle-Ether1"
             }
          ],
          "content":{
             "adjacency-system-id":"0112.3200.1011",
             "adjacency-snpa":"0896.ad76.1e1d",
             "adjacency-interface":"Bundle-Ether1",
             "adjacency-media-type":"isis-media-class-p2p",
             "adjacency-state":"isis-adj-up-state",
             "adjacency-bfd-state":"isis-adj-bfd-no-state",
             "adjacency-ipv6bfd-state":"isis-adj-bfd-no-state",
             "adj-ipv4bfd-retry-running":false,
             "adj-ipv6bfd-retry-running":false,
             "adj-ipv4bfd-retry-exp":0,
             "adj-ipv6bfd-retry-exp":0,
             "adj-ipv4bfd-retry-count":0,
             "adj-ipv6bfd-retry-count":0,
             "adjacency-uptime-valid-flag":true,
             "adjacency-uptime":2492975,
             "adjacency-holdtime":6,
             "adjacency-checkpoint-object-id":1073753696,
             "adjacency-ietf-nsf-capable-flag":true,
             "adjacency-dispriority":0,
             "adjacency-neighbor-priority":0,
             "adjacency-local-priority":0,
             "local-dis-flag":false,
             "neighbor-dis-flag":false,
             "adjacency-area-address":[
                {
                   "entry":"49.1111"
                }
             ],
             "adjacency-topology":[
                {
                   "id":{
                      "af-name":"ipv4",
                      "saf-name":"unicast"
                   },
                   "topology-status":"isis-adj-topo-status-ok",
                   "parallel-p2p-link-suppressed-flag":false
                }
             ],
             "adjacency-per-address-family-data":[
                {
                   "af-name":"ipv4",
                   "ipv4":{
                      "next-hop":"11.231.254.22",
                      "interface-address":[
                         "11.231.254.22"
                      ],
                      "adjacency-sid":{
                         "adjacency-sid-value":24000,
                         "adjacency-sid-backup":{
                            "backup-label-stack-size":1,
                            "backup-label-stack":[
                               120111
                            ],
                            "backup-node-address":"11.232.1.11",
                            "backup-nexthop":"11.231.180.37",
                            "backup-interface":"Bundle-Ether3"
                         }
                      },
                      "non-frr-adjacency-sid":{
                         "adjacency-sid-value":24001
                      }
                   }
                }
             ],
             "nsr-standby":0
          }
       },

Sample Output:

{
   "fields":{
      "adj_ipv4bfd_retry_count":0,
      "adj_ipv4bfd_retry_exp":0,
      "adj_ipv4bfd_retry_running":false,
      "adj_ipv6bfd_retry_count":0,
      "adj_ipv6bfd_retry_exp":0,
      "adj_ipv6bfd_retry_running":false,
      "adjacency_area_address/entry":"49.1111",
      "adjacency_bfd_state":"isis-adj-bfd-no-state",
      "adjacency_checkpoint_object_id":1073753568,
      "adjacency_dispriority":0,
      "adjacency_holdtime":8,
      "adjacency_ietf_nsf_capable_flag":true,
      "adjacency_interface":"Bundle-Ether1",
      "adjacency_ipv6bfd_state":"isis-adj-bfd-no-state",
      "adjacency_local_priority":0,
      "adjacency_media_type":"isis-media-class-p2p",
      "adjacency_neighbor_priority":0,
      "adjacency_per_address_family_data/af_name":"ipv4",
      "adjacency_per_address_family_data/ipv4/adjacency_sid/adjacency_sid_backup/backup_interface":"Bundle-Ether3",
      "adjacency_per_address_family_data/ipv4/adjacency_sid/adjacency_sid_backup/backup_label_stack":120111,
      "adjacency_per_address_family_data/ipv4/adjacency_sid/adjacency_sid_backup/backup_label_stack_size":1,
      "adjacency_per_address_family_data/ipv4/adjacency_sid/adjacency_sid_backup/backup_nexthop":"11.231.180.37",
      "adjacency_per_address_family_data/ipv4/adjacency_sid/adjacency_sid_backup/backup_node_address":"11.232.1.11",
      "adjacency_per_address_family_data/ipv4/adjacency_sid/adjacency_sid_value":24000,
      "adjacency_per_address_family_data/ipv4/interface_address":"11.231.254.22",
      "adjacency_per_address_family_data/ipv4/next_hop":"11.231.254.22",
      "adjacency_per_address_family_data/ipv4/non_frr_adjacency_sid/adjacency_sid_value":24001,
      "adjacency_snpa":"0896.ad76.1e1d",
      "adjacency_state":"isis-adj-up-state",
      "adjacency_system_id":"0112.3200.1011",
      "adjacency_topology/id/af_name":"ipv4",
      "adjacency_topology/id/saf_name":"unicast",
      "adjacency_topology/parallel_p2p_link_suppressed_flag":false,
      "adjacency_topology/topology_status":"isis-adj-topo-status-ok",
      "adjacency_uptime":1135367,
      "adjacency_uptime_valid_flag":true,
      "local_dis_flag":false,
      "neighbor_dis_flag":false,
      "nsr_standby":0
   },
   "name":"routing_adjacency",
   "tags":{
      "host":"ah-1153718-001.sdi.corp.bankofamerica.com",
      "instance_name":"1",
      "interface_name":"Bundle-Ether1",
      "level":"level2",
      "path":"Cisco-IOS-XR-clns-isis-oper:/isis/instances/instance/levels/level/adjacencies/adjacency",
      "source":"USNYNYCXX01CSR001B",
      "system_id":"0112.3200.1011"
   },
   "timestamp":161419913872
}
sjwang90 commented 3 years ago

Hey @sbyx - a Telegraf user who has a pretty large setup was running into this problem of inputs.gnmi dropping metrics. Apparently this is something the Cisco IOS-XR team has run into themselves is why they're building their own collector. Apparently Telegraf may struggle to keep up with the volume of some of these routers due to it being single process.

Do you have any insight on this? If there's any enhancements or fixes we need to make to the plugin?

doxuanquang commented 2 years ago

@sjwang90 I'm trying to achieve same thing with Arista switch but my outputs are already in separate series making it really difficult to add/combine tag/fields.

 [[inputs.gnmi.subscription]]
  name = "bgp"
  origin = "openconfig"
  path = "/network-instances/network-instance/protocols/protocol/bgp/neighbors/neighbor/state/values"
  subscription_mode = "sample"
  sample_interval = "10s"

[[outputs.file]]
  files = ["/etc/telegraf/metrics.out"]
  data_format = "json"
  json_timestamp_units = "10ms"

Here's what I got in metric.out:

{"fields":{"description":"spn-onyc-002"},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634854}
{"fields":{"dynamically_configured":false},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114635124}
{"fields":{"enabled":true},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114635127}
{"fields":{"established_transitions":4},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634853}
{"fields":{"last_established":1626710982292103680},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634853}
{"fields":{"messages/received/UPDATE":2842554},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163226554820}
{"fields":{"messages/sent/UPDATE":455358},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163226412302}
{"fields":{"neighbor_address":"10.253.35.130"},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634853}
{"fields":{"peer_as":4210200800},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634853}
{"fields":{"peer_group":"WAN"},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634854}
{"fields":{"send_community":"NONE"},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114635124}
{"fields":{"session_state":"ESTABLISHED"},"name":"bgp","tags":{"/network-instances/network-instance/protocols/protocol/name":"BGP","host":"61de35803c82","identifier":"BGP","name":"default","neighbor_address":"10.253.35.130","path":"","source":"172.22.255.254"},"timestamp":163114634853}

Do you have suggestion on how to achieve the output like you had above?

TechDawg commented 2 years ago

Hey @sbyx - a Telegraf user who has a pretty large setup was running into this problem of inputs.gnmi dropping metrics. Apparently this is something the Cisco IOS-XR team has run into themselves is why they're building their own collector. Apparently Telegraf may struggle to keep up with the volume of some of these routers due to it being single process.

Do you have any insight on this? If there's any enhancements or fixes we need to make to the plugin?

Hi @sjwang90, please can you share any resources/information you may have surrounding the scale limitations of gNMI plugin?

srebhan commented 1 year ago

Does this problem still exists?

telegraf-tiger[bot] commented 1 year ago

Hello! I am closing this issue due to inactivity. I hope you were able to resolve your problem, if not please try posting this question in our Community Slack or Community Page. Thank you!