DataDog / integrations-core

Core integrations of the Datadog Agent
BSD 3-Clause "New" or "Revised" License
933 stars 1.4k forks source link

Ceph Integration not working for Octopus release #11896

Open nik-johnson-net opened 2 years ago

nik-johnson-net commented 2 years ago

https://github.com/DataDog/integrations-core/blob/2206d5030ee3949d8be07bad477894f22eb3d52d/ceph/datadog_checks/ceph/ceph.py#L62

"mon_status" has been moved to a daemon specific command, so instead the command must be issued like:

/usr/bin/ceph --cluster ceph daemon mon.node-a mon_status -fjson

A patch would need to locate the specific daemon to query.

TomekGl commented 4 months ago

Are there any chances for fixing it? datadog-agent-7.54.1-1.x86_64 + Ceph Pacific doesn't produce any mon-related metrics, that's the most crucial component to monitor in the cluster.

Some basic numbers (ceph.num_mons) could be extracted from ceph status, the more detailed metrics are on ceph tell mon.2 mon_status -fjson. The command proposed by @nik-johnson-net doesn't work for me.

Error:

2024-07-04 11:13:21 UTC | CORE | WARN | (pkg/collector/python/datadog_agent.go:131 in LogMessage) | ceph:b57df6890fa03820 | (ceph.py:68) | Unable to parse data from cmd=mon_status: Expecting value: line 2 column 1 (char 1)

Sample output:

root@ceph-mon-2 ~]# ceph status -fjson | jq .
{
  "fsid": "3b9bd3ca-ff4d-11eb-ada4-566fa99c0024",
  "health": {
    "status": "HEALTH_OK",
    "checks": {},
    "mutes": []
  },
  "election_epoch": 106590,
  "quorum": [
    0,
    1,
    2
  ],
  "quorum_names": [
    "ceph-mon-1",
    "ceph-mon-0",
    "ceph-mon-2"
  ],
  "quorum_age": 2652,
  "monmap": {
    "epoch": 57,
    "min_mon_release_name": "pacific",
    "num_mons": 3
  },
..
[root@ceph-mon-2 ~]# ceph tell mon.2 mon_status -fjson | jq 
{
  "name": "ceph-mon-2",
  "rank": 2,
  "state": "peon",
  "election_epoch": 106590,
  "quorum": [
    0,
    1,
    2
  ],
  "quorum_age": 2705,
  "features": {
    "required_con": "2449958747317026820",
    "required_mon": [
      "kraken",
      "luminous",
      "mimic",
      "osdmap-prune",
      "nautilus",
      "octopus",
      "pacific",
      "elector-pinging"
    ],
    "quorum_con": "4540138297136906239",
    "quorum_mon": [
      "kraken",
      "luminous",
      "mimic",
      "osdmap-prune",
      "nautilus",
      "octopus",
      "pacific",
      "elector-pinging"
    ]
  },
  "outside_quorum": [],
  "extra_probe_peers": [],
  "sync_provider": [],
  "monmap": {
    "epoch": 57,
    "fsid": "3b9bd3ca-ff4d-11eb-ada4-566fa99c0024",
    "modified": "2024-07-04T10:44:56.699904Z",
    "created": "2021-08-17T11:21:40.985975Z",
    "min_mon_release": 16,
    "min_mon_release_name": "pacific",
    "election_strategy": 1,
    "disallowed_leaders: ": "",
    "stretch_mode": false,
    "features": {
      "persistent": [
        "kraken",
        "luminous",
        "mimic",
        "osdmap-prune",
        "nautilus",
        "octopus",
        "pacific",
        "elector-pinging"
      ],
      "optional": []
    },
    "mons": [
      {
        "rank": 0,
        "name": "ceph-mon-1",
        "public_addrs": {
          "addrvec": [
            {
              "type": "v2",
              "addr": "10.20.1.51:3300",
              "nonce": 0
            },
            {
              "type": "v1",
              "addr": "10.20.1.51:6789",
              "nonce": 0
            }
          ]
        },
        "addr": "10.20.1.51:6789/0",
        "public_addr": "10.20.1.51:6789/0",
        "priority": 0,
        "weight": 0,
        "crush_location": "{}"
      },
      {
        "rank": 1,
        "name": "ceph-mon-0",
        "public_addrs": {
          "addrvec": [
            {
              "type": "v2",
              "addr": "10.20.1.50:3300",
              "nonce": 0
            },
            {
              "type": "v1",
              "addr": "10.20.1.50:6789",
              "nonce": 0
            }
          ]
        },
        "addr": "10.20.1.50:6789/0",
        "public_addr": "10.20.1.50:6789/0",
        "priority": 0,
        "weight": 0,
        "crush_location": "{}"
      },
      {
        "rank": 2,
        "name": "ceph-mon-2",
        "public_addrs": {
          "addrvec": [
            {
              "type": "v2",
              "addr": "10.20.1.52:3300",
              "nonce": 0
            },
            {
              "type": "v1",
              "addr": "10.20.1.52:6789",
              "nonce": 0
            }
          ]
        },
        "addr": "10.20.1.52:6789/0",
        "public_addr": "10.20.1.52:6789/0",
        "priority": 0,
        "weight": 0,
        "crush_location": "{}"
      }
    ]
  },
  "feature_map": {
    "mon": [
      {
        "features": "0x3f01cfb9fffdffff",
        "release": "luminous",
        "num": 1
      }
    ],
    "client": [
      {
        "features": "0x3f01cfb9fffdffff",
        "release": "luminous",
        "num": 3
      }
    ]
  },
  "stretch_mode": false
}