ccfos / nightingale

An all-in-one observability solution which aims to combine the advantages of Prometheus and Grafana. It manages alert rules and visualizes metrics, logs, traces in a beautiful web UI.
https://flashcat.cloud/docs/
Apache License 2.0
9.8k stars 1.42k forks source link

The machine identity uses the expression of fuzzy matching (=~), and its value can only be selected one by one. #1774

Closed nondevops closed 3 months ago

nondevops commented 1 year ago

Relevant server.conf | webapi.conf

# debug, release
RunMode = "release"

# # custom i18n dict config
# I18N = "./etc/i18n.json"

# # custom i18n request header key
# I18NHeaderKey = "X-Language"

# metrics descriptions
MetricsYamlFile = "./etc/metrics.yaml"

BuiltinAlertsDir = "./etc/alerts"
BuiltinDashboardsDir = "./etc/dashboards"

# config | api
ClustersFrom = "config"

# using when ClustersFrom = "api"
ClustersFromAPIs = []

[[NotifyChannels]]
Label = "邮箱"
# do not change Key
Key = "email"

[[NotifyChannels]]
Label = "华为云语音"
# do not change Key
Key = "voice"

[[NotifyChannels]]
Label = "企业微信自建应用"
# do not change Key
Key = "weixinworkApp"

[[NotifyChannels]]
Label = "华为云短信"
# do not change Key
Key = "sms"

[[NotifyChannels]]
Label = "钉钉机器人"
# do not change Key
Key = "dingtalk"

[[NotifyChannels]]
Label = "企微机器人"
# do not change Key
Key = "wecom"

[[NotifyChannels]]
Label = "飞书机器人"
# do not change Key
Key = "feishu"

[[NotifyChannels]]
Label = "mm bot"
# do not change Key
Key = "mm"

[[NotifyChannels]]
Label = "telegram机器人"
# do not change Key
Key = "telegram"

[[ContactKeys]]
Label = "Wecom Robot Token"
# do not change Key
Key = "wecom_robot_token"

[[ContactKeys]]
Label = "Dingtalk Robot Token"
# do not change Key
Key = "dingtalk_robot_token"

[[ContactKeys]]
Label = "Feishu Robot Token"
# do not change Key
Key = "feishu_robot_token"

[[ContactKeys]]
Label = "MatterMost Webhook URL"
# do not change Key
Key = "mm_webhook_url"

[[ContactKeys]]
Label = "Telegram Robot Token"
# do not change Key
Key = "telegram_robot_token"

[Log]
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours: 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256

[HTTP]
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 18000
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120

[JWTAuth]
# signing key
SigningKey = "xxx"
# unit: min
AccessExpired = 1500
# unit: min
RefreshExpired = 10080
RedisKeyPrefix = "/jwt/"

[ProxyAuth]
# if proxy auth enabled, jwt auth is disabled
Enable = false
# username key in http proxy header
HeaderUserNameKey = "X-User-Name"
DefaultRoles = ["Standard"]

[BasicAuth]
user001 = "xxxx"

[AnonymousAccess]
PromQuerier = false
AlertDetail = false

[LDAP.Attributes]
Nickname = "cn"
Phone = "homePhone"
#Email = "mail"
Username = "cn"
name = "cn"
surname = "cn"
username = "cn"
member_of = "memberOf"
email =  "mail"

[OIDC]
Enable = false
DisplayName = "OIDC登录"
RedirectURL = "http://n9e.com/callback"
SsoAddr = "http://sso.example.org"
ClientId = ""
ClientSecret = ""
CoverAttributes = true
DefaultRoles = ["Standard"]

[OIDC.Attributes]
Nickname = "nickname"
Phone = "phone_number"
Email = "email"

[CAS]
Enable = false
DisplayName = "CAS登录"
SsoAddr = "https://cas.example.com/cas/"
RedirectURL = "http://127.0.0.1:18000/callback/cas"
CoverAttributes = false
# cas user default roles
DefaultRoles = ["Standard"]

[CAS.Attributes]
Nickname = "nickname"
Phone = "phone_number"
Email = "email"

[OAuth]
Enable = false
DisplayName = "OAuth2登录"
RedirectURL = "http://127.0.0.1:18000/callback/oauth"
SsoAddr = "https://sso.example.com/oauth2/authorize"
TokenAddr = "https://sso.example.com/oauth2/token"
UserInfoAddr = "https://api.example.com/api/v1/user/info"
# "header"  "querystring" "formdata"
TranTokenMethod = "header"
ClientId = ""
ClientSecret = ""
CoverAttributes = true
DefaultRoles = ["Standard"]
UserinfoIsArray = false
UserinfoPrefix = "data"
Scopes = ["profile", "email", "phone"]

[OAuth.Attributes]
# Username must be defined
Username = "username"
Nickname = "nickname"
Phone = "phone_number"
Email = "email"

# example
# # nested : UserinfoIsArray=false, UserinfoPrefix="data"
# # {"data":{"username":"123456","nickname":"姓名"},"code":0,"message":"ok"}
# # nested and array : UserinfoIsArray=true, UserinfoPrefix="data"
# # {"data":[{"username":"123456","nickname":"姓名"}],"code":0,"message":"ok"}
# # flat : UserinfoIsArray=false, UserinfoPrefix=""
# # {"username":"123456","nickname":"姓名"}
# # flat and array : UserinfoIsArray=true, UserinfoPrefix=""
# # [{"username":"123456","nickname":"姓名"}]

[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
Address = "127.0.0.1:16379"
# Username = ""
Password = "xxx"
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel
RedisType = "standalone"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
# SentinelPassword = ""

[DB]
DSN="xxx:xxxx@tcp(127.0.0.1:3306)/n9e_v5?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
# enable auto migrate or not
# EnableAutoMigrate = false

[[Clusters]]
# Prometheus cluster name
Name = "shuitu-VictoriaMetrics"
# Prometheus APIs base url
Prom = "http://127.0.0.1:8428"

# VictoriaMetrics APIs base url
#Prom = "http://127.0.0.1:8428"

# Basic auth username
BasicAuthUser = ""
# Basic auth password
BasicAuthPass = ""
# timeout settings, unit: ms
Timeout = 30000
DialTimeout = 3000
MaxIdleConnsPerHost = 100
Headers = ["X-From", "n9e"]

[[Clusters]]
# Prometheus cluster name
Name = "shuitu-Prometheus"
# Prometheus APIs base url
Prom = "http://xxx:9090"

# VictoriaMetrics APIs base url
#Prom = "http://127.0.0.1:8428"

# Basic auth username
BasicAuthUser = ""
# Basic auth password
BasicAuthPass = ""
# timeout settings, unit: ms
Timeout = 30000
DialTimeout = 3000
MaxIdleConnsPerHost = 100
Headers = ["X-From", "n9e"]

[Ibex]
Address = "http://127.0.0.1:10090"
# basic auth
BasicAuthUser = "ibex"
BasicAuthPass = "ibex"
# unit: ms
Timeout = 3000

[TargetMetrics]
TargetUp = '''max(max_over_time(target_up{ident=~"(%s)"}[%dm])) by (ident)'''
LoadPerCore = '''max(max_over_time(system_load_norm_1{ident=~"(%s)"}[%dm])) by (ident)'''
MemUtil = '''100-max(max_over_time(mem_available_percent{ident=~"(%s)"}[%dm])) by (ident)'''
DiskUtil = '''max(max_over_time(disk_used_percent{ident=~"(%s)", path="/"}[%dm])) by (ident)'''

Relevant logs

前端问题,咱不提供后端日志。

System info

v6.5.0

Steps to reproduce

机器标识使用泛匹配(=~)的表达式,其值还是只能一个一个选择 1、点击创建告警规则 2、选择host 3、选择机器标识 4、选择=~ 5、选择主机列表

备注: 手动输入主机列表,例如172.20.*,不清楚能不能识别到。 还请大哥们指导下,是程序逻辑就是这么设计的还是玩法姿势不对。

需求: 能支持业务组、标签的泛匹配那是最棒的。急需这功能。这功能一旦上线,将提升配置效率。现在配置告警规则相对来说还是比较死板的。

Expected behavior

创建告警规则

Actual behavior

创建告警规则

Additional info

No response

710leo commented 1 year ago

=~ 支持配置 172.20.*

nondevops commented 12 months ago

@710leo 可以支持下业务组、标签的泛匹配吗?谢谢