rfmoz / grafana-dashboards

Grafana dashboards
Apache License 2.0
1.1k stars 438 forks source link

Include Node Exporter `node_pressure_*` PSI metrics in Node Exporter Full dashboard #144

Closed SRv6d closed 4 months ago

SRv6d commented 1 year ago

It would be great to be able to see Pressure Stall Information within the Node Exporter Full dashboard, namely the following metrics:

node_pressure_cpu_waiting_seconds_total node_pressure_io_stalled_seconds_total node_pressure_io_waiting_seconds_total node_pressure_memory_stalled_seconds_total node_pressure_memory_waiting_seconds_total

towolf commented 9 months ago

Here's the panel JSON for that thng all in one graph:

{
  "datasource": {
    "uid": "000000001",
    "type": "prometheus"
  },
  "fieldConfig": {
    "defaults": {
      "custom": {
        "drawStyle": "line",
        "lineInterpolation": "linear",
        "barAlignment": 0,
        "lineWidth": 1,
        "fillOpacity": 10,
        "gradientMode": "none",
        "spanNulls": false,
        "insertNulls": false,
        "showPoints": "never",
        "pointSize": 5,
        "stacking": {
          "mode": "none",
          "group": "A"
        },
        "axisPlacement": "auto",
        "axisColorMode": "text",
        "scaleDistribution": {
          "type": "linear"
        },
        "axisCenteredZero": false,
        "hideFrom": {
          "tooltip": false,
          "viz": false,
          "legend": false
        },
        "thresholdsStyle": {
          "mode": "off"
        }
      },
      "color": {
        "mode": "palette-classic"
      },
      "mappings": [],
      "thresholds": {
        "mode": "absolute",
        "steps": [
          {
            "color": "green",
            "value": null
          },
          {
            "color": "red",
            "value": 80
          }
        ]
      },
      "links": [],
      "min": 0,
      "unit": "percentunit",
      "decimals": 1
    },
    "overrides": [
      {
        "matcher": {
          "id": "byName",
          "options": "Memory some"
        },
        "properties": [
          {
            "id": "color",
            "value": {
              "mode": "fixed",
              "fixedColor": "dark-red"
            }
          }
        ]
      },
      {
        "matcher": {
          "id": "byName",
          "options": "Memory full"
        },
        "properties": [
          {
            "id": "color",
            "value": {
              "mode": "fixed",
              "fixedColor": "light-red"
            }
          }
        ]
      },
      {
        "matcher": {
          "id": "byName",
          "options": "I/O some"
        },
        "properties": [
          {
            "id": "color",
            "value": {
              "mode": "fixed",
              "fixedColor": "dark-blue"
            }
          }
        ]
      },
      {
        "matcher": {
          "id": "byName",
          "options": "I/O full"
        },
        "properties": [
          {
            "id": "color",
            "value": {
              "mode": "fixed",
              "fixedColor": "light-blue"
            }
          }
        ]
      }
    ]
  },
  "gridPos": {
    "h": 10,
    "w": 12,
    "x": 12,
    "y": 22
  },
  "id": 321,
  "links": [],
  "options": {
    "tooltip": {
      "mode": "multi",
      "sort": "none"
    },
    "legend": {
      "showLegend": true,
      "displayMode": "table",
      "placement": "bottom",
      "calcs": [
        "mean",
        "lastNotNull",
        "max",
        "min"
      ]
    }
  },
  "pluginVersion": "9.2.0",
  "targets": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "000000001"
      },
      "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
      "format": "time_series",
      "intervalFactor": 1,
      "legendFormat": "CPU some",
      "refId": "CPU some",
      "step": 240,
      "editorMode": "code",
      "range": true
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "000000001"
      },
      "expr": "rate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
      "format": "time_series",
      "intervalFactor": 1,
      "legendFormat": "Memory some",
      "refId": "Memory some",
      "step": 240,
      "editorMode": "code",
      "range": true,
      "hide": false
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "000000001"
      },
      "expr": "rate(node_pressure_memory_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
      "format": "time_series",
      "intervalFactor": 1,
      "legendFormat": "Memory full",
      "refId": "Memory full",
      "step": 240,
      "editorMode": "code",
      "range": true,
      "hide": false
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "000000001"
      },
      "expr": "rate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
      "format": "time_series",
      "intervalFactor": 1,
      "legendFormat": "I/O some",
      "refId": "I/O some",
      "step": 240,
      "editorMode": "code",
      "range": true,
      "hide": false
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "000000001"
      },
      "expr": "rate(node_pressure_io_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])",
      "format": "time_series",
      "intervalFactor": 1,
      "legendFormat": "I/O full",
      "refId": "I/O full",
      "step": 240,
      "editorMode": "code",
      "range": true,
      "hide": false
    }
  ],
  "title": "Pressure Stall Information",
  "type": "timeseries",
  "description": "https://docs.kernel.org/accounting/psi.html"
}

image

rfmoz commented 4 months ago

Added on #147 as normal panel and in d33a10c268e5081aa5de33ec8ffdfb2741109498 as quick info box.