flashcatcloud / categraf

one-stop telemetry collector for nightingale
https://flashcat.cloud/docs/
MIT License
758 stars 237 forks source link

对于docker里运行的服务能和supervisord实现一样的监控效果吗? #960

Open waittingsummer opened 4 weeks ago

waittingsummer commented 4 weeks ago

What would you like to be added:

需要一个指标点,用来专门监视docker里的服务运行状态,效果和supervisord一样 也有可能是我对docker插件不太熟悉,没配置对 截图_选择区域_20240607102911 截图_选择区域_20240607102332

Why is this needed: 对于docker的运维更加直观便捷

Describe the solution you'd like

Additional context

# # collect interval
interval = 15

[[instances]]
# # append some labels for series
labels = { region="cloud", product="n9e" }

# # interval = global.interval * interval_times
# interval_times = 1

## Docker Endpoint
##   To use TCP, set endpoint = "tcp://[ip]:[port]"
##   To use environment variables (ie, docker-machine), set endpoint = "ENV"
# endpoint = "unix:///var/run/docker.sock"
endpoint = "unix:///var/run/docker.sock"

## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
gather_services = false
gather_extend_memstats = false

container_id_label_enable = true
container_id_label_short_style = true

## Containers to include and exclude. Globs accepted.
## Note that an empty array for both will include all containers
container_name_include = []
container_name_exclude = []

## Container states to include and exclude. Globs accepted.
## When empty only containers in the "running" state will be captured.
## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
container_state_include = []
container_state_exclude = []

## Timeout for docker list, info, and stats commands
timeout = "5s"

## Specifies for which classes a per-device metric should be issued
## Possible values are 'cpu' (cpu0, cpu1, ...), 'blkio' (8:0, 8:1, ...) and 'network' (eth0, eth1, ...)
## Please note that this setting has no effect if 'perdevice' is set to 'true'
perdevice_include = []

## Specifies for which classes a total metric should be issued. Total is an aggregated of the 'perdevice' values.
## Possible values are 'cpu', 'blkio' and 'network'
## Total 'cpu' is reported directly by Docker daemon, and 'network' and 'blkio' totals are aggregated by this plugin.
## Please note that this setting has no effect if 'total' is set to 'false'
total_include = ["cpu", "blkio", "network"]

## Which environment variables should we use as a tag
##tag_env = ["JAVA_HOME", "HEAP_SIZE"]

## docker labels to include and exclude as tags. Globs accepted.
## Note that an empty array for both will include all labels as tags
docker_label_include = []
docker_label_exclude = ["annotation*", "io.kubernetes*", "*description*", "*maintainer*", "*hash", "*author*", "*org_*", "*date*", "*url*", "*docker_compose*"]

## Optional TLS Config
use_tls = false
tls_ca = "/etc/telegraf/ca.pem"
tls_cert = "/etc/telegraf/cert.pem"
tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
insecure_skip_verify = false
kongfei605 commented 3 weeks ago

没太了解,原始需求是什么?

waittingsummer commented 3 weeks ago

没太了解,原始需求是什么?

就是需要一个指标点能专门监控docker里跑的服务的状态的,比如docker里跑了nginx 那么我监控docker的同时就会自动产生docker_nginx_up这个指标反应nginx的运行状态 当然我也说了 可能是我对docker插件不太熟悉,没配置对

UlricQin commented 3 weeks ago

没太了解,原始需求是什么?

就是需要一个指标点能专门监控docker里跑的服务的状态的,比如docker里跑了nginx 那么我监控docker的同时就会自动产生docker_nginx_up这个指标反应nginx的运行状态 当然我也说了 可能是我对docker插件不太熟悉,没配置对

docker 和 supervisor 的逻辑是不同的,不支持你这个想法