Bpazy / blog

我的博客,欢迎关注和讨论
https://github.com/Bpazy/blog/issues
MIT License
41 stars 2 forks source link

夜莺 (n9e) 的使用 #339

Open Bpazy opened 1 week ago

Bpazy commented 1 week ago

安装

P0 级系统,推荐二进制安装,遵循官方的安装方法即可: https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/install/binary/

注意配置好 mysql, redis。

最后配置一下 systemctl:

cat > /usr/lib/systemd/system/n9e.service <<EOF
[Unit]
Description=Nightinagle

[Service]
WorkingDirectory=/home/ziyuan/n9e
ExecStart=/home/ziyuan/n9e/n9e

[Install]
WantedBy=multi-user.target
EOF
Bpazy commented 4 days ago

告警规则

awesome-prometheus-rule

点击我查看详情 ```json [ { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [] } ], "name": "K8s Service 黑盒监控", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "probe_success{job=\"blackbox_service\"} == 0", "severity": 2, "unit": "none" } ] }, "event_relabel_config": null, "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 5, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "KubernetesPodCrashLooping", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "event_relabel_config": [], "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "increase(kube_pod_container_status_restarts_total[1m]) > 3", "severity": 2, "unit": "none" } ], "task_tpls": [] }, "event_relabel_config": [], "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 10, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "KubernetesPodNotHealthy", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "event_relabel_config": [], "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "sum by (namespace, pod) (kube_pod_status_phase{phase=~\"Pending|Unknown|Failed\"}) > 0", "severity": 2, "unit": "none" } ], "task_tpls": [] }, "event_relabel_config": [], "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 10, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "More than 60% of the connections in MySQL are in a running state", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "avg by (instance) (mysql_global_status_threads_running) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 60", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlHighThreadsRunning" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "More than 80% of MySQL files open", "note": "More than 80% of MySQL files open", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "avg by (instance) (mysql_global_status_innodb_num_open_files) / avg by (instance)(mysql_global_variables_open_files_limit) * 100 > 80", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlHighOpenFiles" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "MySQL connection count has exceeded 80%", "note": "More than 80% of MySQL connections are in use", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "avg by (instance) (mysql_global_status_threads_connected) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlTooManyConnections" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "Mysql has just restarted. Please be advised - exporter", "note": "MySQL has just been restarted, less than one minute ago", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 3, "severities": [ 3 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "mysql_global_status_uptime < 60", "severity": 3 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlRestarted" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "MySQL server mysql has some new slow query", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "inhibit": false, "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "increase(mysql_global_status_slow_queries[1m]) > 10", "severity": 2, "unit": "none" } ] }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlSlowQueries" ], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "MysqlInnodbLogWaits - exporter", "note": "MySQL innodb log writes stalling", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "rate(mysql_global_status_innodb_log_waits[15m]) > 10", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlInnodbLogWaits" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "MysqlSlaveIoThreadNotRunning - exporter", "note": "MySQL Slave IO thread not running", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_io_running == 0", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlSlaveIoThreadNotRunning" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "MysqlSlaveReplicationLag - exporter", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) (mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay) > 30", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlSlaveReplicationLag" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "MysqlSlaveSqlThreadNotRunning - exporter", "note": "MySQL Slave SQL thread not running", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_sql_running == 0", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlSlaveSqlThreadNotRunning" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "NodeMemoryUsage", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "event_relabel_config": [], "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / node_memory_MemTotal_bytes * 100 > 95", "severity": 2, "unit": "none" } ], "task_tpls": [] }, "event_relabel_config": [], "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 10, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "PodMemoryUsage", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "event_relabel_config": [], "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "sum(container_memory_working_set_bytes{pod!=\"\"}) BY (instance, pod) / sum(container_spec_memory_limit_bytes{pod!=\"\"} > 0) BY (instance, pod) * 100 > 80", "severity": 2, "unit": "none" } ], "task_tpls": [] }, "event_relabel_config": [], "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 10, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "The MySQL instance is down", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "mysql_up == 0", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [ "alertname=MysqlDown" ], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [] } ], "name": "Victoria Metrics 容量预警", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "queries": [ { "prom_ql": "sum(vm_data_size_bytes{}) > 20 * 1024 * 1024 * 1024", "severity": 2, "unit": "bytesIEC" } ] }, "event_relabel_config": null, "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vm error logging rate 大于0 ", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "sum(rate(vm_log_messages_total{job=~\"$job\",instance=~\"$instance\", level!=\"info\"}[5m])) by (job, level) > 0", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [ "https://api.flashcat.cloud/event/push/alert/n9e?integration_key=f1258018f5595ba7bf30572f2f44c1ac973" ], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vm-insert 和 vm-storage 组件之间连接的饱和度大于9", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "max(rate(vm_rpc_send_duration_seconds_total{job_name=~\".+\"}[5m])) by(addr) / 1000 > 9", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [ "https://api.flashcat.cloud/event/push/alert/n9e?integration_key=f1258018f5595ba7bf30572f2f44c1ac973" ], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vm-insert 实例端口异常", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "net_response_result_code{service=\"vm-insert\"}!=0", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 120, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vm-select 实例端口异常", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "net_response_result_code{service=\"vm-select\"}!=0", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [ "https://api.flashcat.cloud/event/push/alert/n9e?integration_key=f1258018f5595ba7bf30572f2f44c1ac973" ], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vm-select请求查询延迟超过15s", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "max(vm_request_duration_seconds{job_name=~\".+\"}) by (path) > 15", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [ "https://api.flashcat.cloud/event/push/alert/n9e?integration_key=f1258018f5595ba7bf30572f2f44c1ac973" ], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vm-storage 实例端口异常", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 1, "severities": [ 1 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "net_response_result_code{service=\"vm-storage\",env!=\"luke\",env!=\"guotai\"}!=0", "severity": 1 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 120, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vminsert服务宕机", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "up{service=~\"vminsert.+\"} < 1", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 60, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vmselect服务宕机", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "up{service=~\"vmselect.+\"} < 1", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 60, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "vmstorage服务宕机", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "up{service=~\"vmstorage.+\"} < 1", "severity": 2 } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 60, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机交换内存快满了", "note": "主机交换内存已满 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "(1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机入口网络吞吐量异常", "note": "主机异常网络吞吐量 入 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 300, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机内存不足", "note": "节点内存不足 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机内存有压力", "note": "节点内存压力大 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "rate(node_vmstat_pgmajfault[1m]) > 1000", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机出口网络吞吐量异常", "note": "主机异常网络吞吐量 出 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 300, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机时钟不同步", "note": "主机时钟不同步 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "inhibit": false, "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "min_over_time(node_timex_sync_status{instance!=\"192.168.31.20:9100\"}[1m]) == 0 and node_timex_maxerror_seconds >= 16", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ] }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机时钟偏差", "note": "主机时钟偏差 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "inhibit": false, "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "(node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ] }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机磁盘写入延迟异常", "note": "主机异常磁盘写入延迟 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "rate(node_disk_write_time_seconds_total[1m]) / rate(node_disk_writes_completed_total[1m]) > 0.1 and rate(node_disk_writes_completed_total[1m]) > 0", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机磁盘写入速率异常", "note": "主机异常磁盘写入率 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机磁盘将在 24 小时内填满", "note": "主机磁盘将在 24 小时内填满 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "(node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~\"tmpfs\"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机磁盘读取延迟异常", "note": "主机异常磁盘读取延迟 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 120, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "主机磁盘读取速率异常", "note": "主机异常磁盘读取率 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 300, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "检测到 HostOomKill", "note": "检测到主机 OOM 终止 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "increase(node_vmstat_oom_kill[1m]) > 0", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 0, "op": "in", "values": [ 1 ] } ], "name": "物联网设备电量", "note": "", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 0, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 60, "prom_ql": "", "rule_config": { "event_relabel_config": [], "queries": [ { "keys": { "labelKey": "", "metricKey": "", "valueKey": "" }, "prom_ql": "haas_sensor_battery_percent{} < 50", "severity": 2, "unit": "percent" } ], "task_tpls": [] }, "event_relabel_config": [], "prom_eval_interval": 30, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "00:00", "enable_etimes": [ "00:00" ], "enable_days_of_week": [ "0", "1", "2", "3", "4", "5", "6" ], "enable_days_of_weeks": [ [ "0", "1", "2", "3", "4", "5", "6" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 10, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": {}, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" }, { "cate": "prometheus", "datasource_ids": [ 1 ], "datasource_queries": [ { "match_type": 2, "op": "in", "values": [ 0 ] } ], "name": "节点cpu负载高", "note": "主机 CPU 负载高 (instance {{ $labels.instance }})", "prod": "metric", "algorithm": "", "algo_params": null, "delay": 0, "severity": 2, "severities": [ 2 ], "disabled": 0, "prom_for_duration": 0, "prom_ql": "", "rule_config": { "algo_params": null, "inhibit": false, "prom_ql": "", "queries": [ { "prom_ql": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[2m])) * 100) > 80", "recover_config": { "judge_type": 0, "recover_exp": "" }, "severity": 2, "unit": "" } ], "severity": 0 }, "event_relabel_config": null, "prom_eval_interval": 15, "enable_stime": "00:00", "enable_stimes": [ "00:00" ], "enable_etime": "23:59", "enable_etimes": [ "23:59" ], "enable_days_of_week": [ "1", "2", "3", "4", "5", "6", "0" ], "enable_days_of_weeks": [ [ "1", "2", "3", "4", "5", "6", "0" ] ], "enable_in_bg": 0, "notify_recovered": 1, "notify_channels": [ "dingtalk" ], "notify_repeat_step": 60, "notify_max_number": 0, "recover_duration": 0, "callbacks": [], "runbook_url": "", "append_tags": [], "annotations": null, "extra_config": null, "uuid": 0, "cur_event_count": 0, "update_by_nickname": "超管" } ] ```