docker-flow / docker-flow-monitor

MIT License
87 stars 38 forks source link

no targets and jobs under scrape_configs appear #61

Closed dorsany closed 6 years ago

dorsany commented 6 years ago

Hi, I have some issues to configure the Docker flow monitor in my swarm cluster. From some reason I don't get any targets in the prometheus.

i'm not sure why the scrape_configs is not exist and why I don't have any jobs or targets under it.

here is some of my swarm docker compose which include the proxy,monitor and exporters:

version: '3.3'
volumes:
  prometheus_data:
  grafana_data:
  swarm-endpoints:
  txt_file_exporter_data:
networks:
  monitor:
    external: true
  proxy:
    external: true
  prod:
    external: true
configs:
  alert_manager_config:
    file: ./monitor/alertmanager/config.yml
  blackbox_exporter_config:
    file: ./monitor/blackbox-exporter/blackbox.yml
  grafana_ini_config:
    file: ./monitor/grafana/grafana.ini
  grafana_dashboard_allhosts_config:
    file: ./monitor/grafana/dashboards/monitor_all_hosts_rev1.json
  grafana_dashboard_application_config:
    file: ./monitor/grafana/dashboards/application_monitoring_rev1.json
  grafana_dashboard_system_config:
    file: ./monitor/grafana/dashboards/system_docker_monitoring_rev2.json
  grafana_provisioning_dashboard_config:
    file: ./monitor/grafana/provisioning/dashboards/provisioning_config_file.yml
  grafana_provisioning_datasources_config:
    file: ./monitor/grafana/provisioning/datasources/datasource.yml
secrets:
  prometheus_scraps_config:
    file: ./monitor/prometheus/scrape_swarm_prometheus.yml
services:
  proxy:
    image: dockerflow/docker-flow-proxy:18.07.18-74
    ports:
      - "80:80"
      - "443:443"
      #- "3001:3001"
    networks:
      proxy:
        aliases:
          - proxy
    environment:
      - LISTENER_ADDRESS=swarm-listener
      - MODE=swarm
      - DEBUG=true
    deploy:
#      replicas: 1
      mode: global
      placement:
        constraints: [node.role == manager]
      restart_policy:
        delay: 5s
    logging:
      options:
        max-size: 1g
  swarm-listener:
    image: dockerflow/docker-flow-swarm-listener:18.07.03-28
    privileged: true
    networks:
      - proxy
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    environment:
      - DF_NOTIFY_CREATE_SERVICE_URL=http://proxy:8080/v1/docker-flow-proxy/reconfigure
      - DF_NOTIFY_REMOVE_SERVICE_URL=http://proxy:8080/v1/docker-flow-proxy/remove
      - DF_NOTIFY_CREATE_SERVICE_URL=http://monitor:8080/v1/docker-flow-monitor/reconfigure
      - DF_NOTIFY_REMOVE_SERVICE_URL=http://monitor:8080/v1/docker-flow-monitor/remove
      - DF_NOTIFY_CREATE_NODE_URL=http://monitor:8080/v1/docker-flow-monitor/node/reconfigure
      - DF_NOTIFY_REMOVE_NODE_URL=http://monitor:8080/v1/docker-flow-monitor/node/remove
      - DF_INCLUDE_NODE_IP_INFO=true
    deploy:
      replicas: 1
      placement:
        constraints: [node.role == manager]
      restart_policy:
        delay: 5s
    logging:
      options:
        max-size: 1g
  monitor: #This is also include prometheus
   image: dockerflow/docker-flow-monitor
   environment:
     - LISTENER_ADDRESS=swarm-listener
     - DF_GET_NODES_URL=http://swarm-listener:8080/v1/docker-flow-swarm-listener/get-nodes
     - GLOBAL_SCRAPE_INTERVAL=10s
     #- ARG_WEB_ROUTE-PREFIX=/monitor
     - ARG_ALERTMANAGER_URL=http://alert-manager:9093
     - ARG_CONFIG_FILE=/etc/prometheus/prometheus.yml
     - ARG_STORAGE_TSDB_PATH=/prometheus
     - ARG_STORAGE_TSDB_RETENTION=10d
     - ARG_WEB_ENABLE-LIFECYCLE=
     - ARG_WEB_ENABLE-ADMIN-API=
     - GLOBAL__SCRAPE_INTERVAL=60s
     - GLOBAL__evaluation_interval=60s
     - GLOBAL__scrape_timeout=60s
     - DF_SCRAPE_TARGET_LABELS=metricType,url_healthcheck
     #- DF_NODE_TARGET_LABELS=aws_region,role
   secrets:
     - source: prometheus_scraps_config
       target: /run/secrets/scrape_swarm_prometheus.yml
       #uid: "0"
       mode: 444
   networks:
     - monitor
     - proxy
   ports:
     - 9090:9090
   deploy:
     replicas: 1
     placement:
       constraints: [node.role == manager]
     restart_policy:
       delay: 5s
   logging:
     options:
       max-size: 1g
   labels:
     com.df.notify: 'true'
  alert-manager:
    image: prom/alertmanager:v0.15.2
    configs:
      - source: alert_manager_config
        target: /etc/alertmanager/config.yml
        mode: 444
    command:
      - '--config.file=/etc/alertmanager/config.yml'
      - '--storage.path=/alertmanager'
    #ports:
    #  - 9093:9093
    networks:
      - monitor
    environment:
      - ADMIN_USER=${ADMIN_USER:-admin}
      - ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
    logging:
      options:
        max-size: 1g
    deploy:
      replicas: 1
      placement:
        constraints: [node.role == manager]
    labels:
      com.df.notify: 'true'
  grafana:
    image: grafana/grafana:5.2.2
    volumes:
      - grafana_data:/var/lib/grafana:rw
    configs:
      - source: grafana_ini_config
        target: /etc/grafana/grafana.ini
        mode: 444
      - source: grafana_dashboard_allhosts_config
        target: /etc/grafana/dashboards/monitor_all_hosts_rev1.json
        mode: 444
      - source: grafana_dashboard_application_config
        target: /etc/grafana/dashboards/application_monitoring_rev1.json
        mode: 444
      - source: grafana_dashboard_system_config
        target: /etc/grafana/dashboards/system_docker_monitoring_rev2.json
        mode: 444
      - source: grafana_provisioning_dashboard_config
        target: /etc/grafana/provisioning/dashboards/provisioning_config_file.yml
        mode: 444
      - source: grafana_provisioning_datasources_config
        target: /etc/grafana/provisioning/datasources/datasource.yml
        mode: 444
    environment:
      - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
      - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
      - GF_USERS_ALLOW_SIGN_UP=false
    ports:
      - 3001:3001
    networks:
      - monitor
      - proxy
    deploy:
      replicas: 1
      placement:
        constraints: [node.role == manager]
    logging:
      options:
        max-size: 1g
    labels:
      com.df.notify: 'true'
      com.df.servicePath: "/monitor"
      com.df.reqPathSearchReplace: "/monitor,"
      com.df.port: 3001
  blackbox:
    image: prom/blackbox-exporter:v0.12.0
    #ports:
    #  - "9115:9115"
    networks:
      - monitor
      - prod
    configs:
      - source: blackbox_exporter_config
        target: /config/blackbox.yml
        mode: 444
    command:
      - '--config.file=/config/blackbox.yml'
      - '--log.level=debug'
    deploy:
      replicas: 1
      placement:
        constraints: [node.role == manager]
      resources:
        limits:
          cpus: '0.1'
          memory: '1gb'
    logging:
      options:
        max-size: 1g
    labels:
      com.df.notify: 'true'
      com.df.scrapePort: 9115
      com.df.scrapeNetwork: monitor
      com.df.metricType: url_healthcheck
  nodeexporter:
    image: prom/node-exporter:v0.16.0
    user: root
    privileged: true
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
      - /etc/hostname:/etc/host_hostname
      - txt_file_exporter_data:/etc/node-exporter:ro
    environment:
      - HOST_HOSTNAME=/etc/host_hostname
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc|docker|tmpfs)($$|/)'
      - '--collector.filesystem.ignored-fs-types=^/(aufs|cgroup|devpts|mqueue|nsfs|sysfs|proc|tmpfs|loop|shm|none|overlay)($$|/)'
      - '--collector.textfile.directory=/etc/node-exporter'
    restart: always
    ports:
      - 9100:9100
    networks:
      - monitor
    deploy:
      mode: global
      restart_policy:
        delay: 5s
      resources:
        limits:
          cpus: '0.1'
          memory: '1gb'
    logging:
      options:
        max-size: 1g
    labels:
      com.df.notify: 'true'
      com.df.scrapeNetwork: monitor
      com.df.scrapePort: 9100
      com.df.metricType: system
  cadvisor:
    image: google/cadvisor:v0.30.2
    privileged: true
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - /:/rootfs:ro
      - /var/run:/var/run:rw
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro
    restart: always
    ports:
      - 9101:9101
    networks:
      - monitor
    command:
      - '--port=9101'
    deploy:
      mode: global
      restart_policy:
        delay: 5s
      resources:
        limits:
          cpus: '0.1'
          memory: '1gb'
    logging:
      options:
        max-size: 1g
    labels:
      com.df.notify: 'true'
      com.df.scrapeNetwork: monitor
      com.df.scrapePort: 9101
      com.df.metricType: system

example of one app:

  nginx:
    image: nginx
    networks:
      proxy:
      site01:
        aliases:
         - nginx-site01.domain.local
    volumes:
      - /storage:/opt/nginx/html:ro
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    deploy:
      mode: replicated
      replicas: 1
      endpoint_mode: dnsrr
      placement:
        constraints:
          - node.labels.site==site01
      restart_policy:
        delay: 5s
      labels:
        - com.df.notify='true'
        - com.df.healthurl=nginx-site01.domain.local
        - com.df.scrapeNetwork=monitor
        - com.df.metricType=url_healthcheck
        - com.df.alertName=mem_limit
        - com.df.alertIf=@service_mem_limit:0.8
        - com.df.alertFor=5s
        - com.df.scaleMin=2
        - com.df.scaleMax=4
        - com.df.port=443
        - com.df.srcPort=443
        - com.df.reqMode=sni
        - com.df.pathType-"req.ssl_sni -i -m reg"
        - com.df.servicePath="^(nginx-site01\\.)"

here is the scrape file under /etc/prometheus/prometheus.yml:

global:
  scrape_interval: 1m
  scrape_timeout: 1m
  evaluation_interval: 1m
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      - alert-manager:9093
    scheme: http
    timeout: 10s
rule_files:
- /etc/prometheus/alert.rules

some logs from monitor:

docker logs monitor_monitor.1.l8ydgmdj2iyawtaa89ee35x2l
2018/08/27 11:16:51 Requesting services from Docker Flow Swarm Listener
2018/08/27 11:16:56 Processing: [{"distribute":"true","port":"8080","replicas":"1","reqPathSearchReplace":"\"/site01,\"","serviceName":"site01_app01","servicePath":"\"/site01\""},{"distribute":"true","port":"8080","replicas":"1","reqPathSearchReplace":"\"/site02,\"","serviceName":"site02_app01","servicePath":"\"/site02\""},{"distribute":"true","pathType":"req.ssl_sni -i -m reg","port":"9443","replicas":"1","reqMode":"sni","serviceName":"site01_apigateway","servicePath":"^(apigateway-site01\\.)","srcPort":"9443"},{"distribute":"true","pathType":"req.ssl_sni -i -m reg","port":"9443","replicas":"1","reqMode":"sni","serviceName":"site02_apigateway","servicePath":"^(apigateway-site02\\.)","srcPort":"9443"},{"alertFor":"5s","alertIf":"@service_mem_limit:0.8","alertName":"mem_limit","distribute":"true","healthurl":"nginx-site01.domain.local","metricType":"url_healthcheck","pathType-\"req.ssl_sni -i -m reg\"":"","port":"443","replicas":"1","reqMode":"sni","scaleMax":"4","scaleMin":"2","scrapeNetwork":"monitor","serviceName":"site01_nginx","servicePath":"\"^(nginx-site01\\\\.)\"","srcPort":"443"},{"alertFor":"5s","alertIf":"@service_mem_limit:0.8","alertName":"mem_limit","distribute":"true","healthurl":"nginx-site02.domain.local","metricType":"url_healthcheck","pathType-\"req.ssl_sni -i -m reg\"":"","port":"443","replicas":"1","reqMode":"sni","scaleMax":"4","scaleMin":"2","scrapeNetwork":"monitor","serviceName":"site02_nginx","servicePath":"\"^(nginx-site02\\\\.)\"","srcPort":"443"}]
2018/08/27 11:16:56 Requesting nodes from Docker Flow Swarm Listener
2018/08/27 11:16:56 Processing: [{"address":"10.132.0.10","availability":"active","hostname":"swarm-worker-3","id":"0abczxkaqmgvscwm7r0xafut2","role":"worker","state":"ready","versionIndex":"477591"},{"address":"10.132.0.8","availability":"active","hostname":"swarm-worker-1","id":"4a61xthy8rnd8tu08e48pabx6","role":"worker","state":"ready","versionIndex":"477591"},{"address":"0.0.0.0","availability":"active","hostname":"swarm-manager-2","id":"hn0r5pmdr2gruu6haneqo1b72","role":"manager","state":"ready","versionIndex":"477591"},{"address":"10.132.0.5","availability":"active","hostname":"swarm-manager-1","id":"qg5z99jfzvw1lbcfe7vbqfyom","role":"manager","state":"ready","versionIndex":"477591"},{"address":"10.132.0.7","availability":"active","hostname":"swarm-worker-2","id":"vnjn8uj8mptck9ik291muyqzf","role":"worker","state":"ready","versionIndex":"477591"}]
2018/08/27 11:16:56 Writing to alert.rules
2018/08/27 11:16:56 Writing to prometheus.yml
2018/08/27 11:16:56 Starting Prometheus
2018/08/27 11:16:56 /bin/sh -c prometheus --config.file="/etc/prometheus/prometheus.yml" --storage.tsdb.path="/prometheus" --storage.tsdb.retention="10d" --web.enable-lifecycle --web.console.libraries="/usr/share/prometheus/console_libraries" --web.console.templates="/usr/share/prometheus/consoles"
2018/08/27 11:16:56 Starting Docker Flow Monitor
level=info ts=2018-08-27T11:16:56.48316236Z caller=main.go:222 msg="Starting Prometheus" version="(version=2.3.2, branch=HEAD, revision=71af5e29e815795e9dd14742ee7725682fa14b7b)"
level=info ts=2018-08-27T11:16:56.483243102Z caller=main.go:223 build_context="(go=go1.10.3, user=root@5258e0bd9cc1, date=20180712-14:02:52)"
level=info ts=2018-08-27T11:16:56.483262247Z caller=main.go:224 host_details="(Linux 4.15.0-1018-gcp #19-Ubuntu SMP Thu Aug 16 13:38:55 UTC 2018 x86_64 95303c893d07 (none))"
level=info ts=2018-08-27T11:16:56.483277801Z caller=main.go:225 fd_limits="(soft=1048576, hard=1048576)"
level=info ts=2018-08-27T11:16:56.484625376Z caller=web.go:415 component=web msg="Start listening for connections" address=0.0.0.0:9090
level=info ts=2018-08-27T11:16:56.484599804Z caller=main.go:533 msg="Starting TSDB ..."
level=info ts=2018-08-27T11:16:56.490405807Z caller=main.go:543 msg="TSDB started"
level=info ts=2018-08-27T11:16:56.490472545Z caller=main.go:603 msg="Loading configuration file" filename=/etc/prometheus/prometheus.yml
level=info ts=2018-08-27T11:16:56.492077907Z caller=main.go:629 msg="Completed loading of configuration file" filename=/etc/prometheus/prometheus.yml
level=info ts=2018-08-27T11:16:56.492137538Z caller=main.go:502 msg="Server is ready to receive web requests."

logs from listener:

docker service logs management_df-swarm-listener
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Starting Docker Flow: Swarm Listener
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Using Docker Client API version: 1.37
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Sending notifications for running services and nodes
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Listening to Docker Service Events
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Listening to Docker Node Events
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=0.0.0.0&availability=active&hostname=swarm-manager-2&id=hn0r5pmdr2gruu6haneqo1b72&role=manager&state=ready&versionIndex=477482
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.132.0.10&availability=active&hostname=swarm-worker-3&id=0abczxkaqmgvscwm7r0xafut2&role=worker&state=ready&versionIndex=477482
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.132.0.7&availability=active&hostname=swarm-worker-2&id=vnjn8uj8mptck9ik291muyqzf&role=worker&state=ready&versionIndex=477482
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.132.0.5&availability=active&hostname=swarm-manager-1&id=qg5z99jfzvw1lbcfe7vbqfyom&role=manager&state=ready&versionIndex=477482
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:28 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.132.0.8&availability=active&hostname=swarm-worker-1&id=4a61xthy8rnd8tu08e48pabx6&role=worker&state=ready&versionIndex=477482
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:33 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&port=8080&replicas=1&reqPathSearchReplace=%22%2Fsite02%2C%22&serviceName=site02_app01&servicePath=%22%2Fsite02%22
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:33 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&pathType=req.ssl_sni+-i+-m+reg&port=9443&replicas=1&reqMode=sni&serviceName=site02_apigateway&servicePath=%5E%28apigateway-site02%5C.%29&srcPort=9443
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:33 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?alertFor=5s&alertIf=%40service_mem_limit%3A0.8&alertName=mem_limit&distribute=true&healthurl=nginx-site02.domain.local&metricType=url_healthcheck&pathType-%22req.ssl_sni+-i+-m+reg%22=&port=443&replicas=1&reqMode=sni&scaleMax=4&scaleMin=2&scrapeNetwork=monitor&serviceName=site02_nginx&servicePath=%22%5E%28nginx-site02%5C%5C.%29%22&srcPort=443
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:33 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&port=8080&replicas=1&reqPathSearchReplace=%22%2Fsite01%2C%22&serviceName=site01_app01&servicePath=%22%2Fsite01%22
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:33 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?alertFor=5s&alertIf=%40service_mem_limit%3A0.8&alertName=mem_limit&distribute=true&healthurl=nginx-site01.domain.local&metricType=url_healthcheck&pathType-%22req.ssl_sni+-i+-m+reg%22=&port=443&replicas=1&reqMode=sni&scaleMax=4&scaleMin=2&scrapeNetwork=monitor&serviceName=site01_nginx&servicePath=%22%5E%28nginx-site01%5C%5C.%29%22&srcPort=443
management_df-swarm-listener.1.f0v58zj92u1l@swarm-manager-1    | 2018/08/27 11:06:33 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&pathType=req.ssl_sni+-i+-m+reg&port=9443&replicas=1&reqMode=sni&serviceName=site01_apigateway&servicePath=%5E%28apigateway-site01%5C.%29&srcPort=9443

any idea?

Thanks!

vfarcic commented 6 years ago

Can you take a look @thomasjpfan ?

thomasjpfan commented 6 years ago

@dorsanyvision The DFSL needs to be adjusted a little. The DF_* are comma separated list of urls:

  swarm-listener:
    image: dockerflow/docker-flow-swarm-listener:18.09.06-6
    networks:
      - proxy
    environment:
      - DF_NOTIFY_CREATE_SERVICE_URL=http://proxy:8080/v1/docker-flow-proxy/reconfigure,http://monitor:8080/v1/docker-flow-monitor/reconfigure
      - DF_NOTIFY_REMOVE_SERVICE_URL=http://proxy:8080/v1/docker-flow-proxy/remove,http://monitor:8080/v1/docker-flow-monitor/remove

Based on the monitor logs, it seems to me cdavisor target is not added. I went through your example and created a small example that works on my system:

version: '3.3'
networks:
  monitor:
    external: true
  proxy:
    external: true
services:
  proxy:
    image: dockerflow/docker-flow-proxy:18.09.05-6
    ports:
      - 80:80
      - 443:443
    networks:
      - proxy
    environment:
      - LISTENER_ADDRESS=swarm-listener
      - DEBUG=true
    deploy:
      mode: global
  swarm-listener:
    image: dockerflow/docker-flow-swarm-listener:18.09.06-6
    networks:
      - proxy
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    environment:
      - DF_NOTIFY_CREATE_SERVICE_URL=http://proxy:8080/v1/docker-flow-proxy/reconfigure,http://monitor:8080/v1/docker-flow-monitor/reconfigure
      - DF_NOTIFY_REMOVE_SERVICE_URL=http://proxy:8080/v1/docker-flow-proxy/remove,http://monitor:8080/v1/docker-flow-monitor/remove
      - DF_NOTIFY_CREATE_NODE_URL=http://monitor:8080/v1/docker-flow-monitor/node/reconfigure
      - DF_NOTIFY_REMOVE_NODE_URL=http://monitor:8080/v1/docker-flow-monitor/node/remove
      - DF_INCLUDE_NODE_IP_INFO=true
    deploy:
      replicas: 1
  monitor: #This is also include prometheus
   image: dockerflow/docker-flow-monitor
   environment:
     - LISTENER_ADDRESS=swarm-listener
     - DF_GET_NODES_URL=http://swarm-listener:8080/v1/docker-flow-swarm-listener/get-nodes
     - GLOBAL_SCRAPE_INTERVAL=10s
     - ARG_ALERTMANAGER_URL=http://alert-manager:9093
     - ARG_STORAGE_TSDB_PATH=/prometheus
     - ARG_STORAGE_TSDB_RETENTION=10d
     - GLOBAL__SCRAPE_INTERVAL=60s
     - GLOBAL__evaluation_interval=60s
     - GLOBAL__scrape_timeout=60s
     - DF_SCRAPE_TARGET_LABELS=metricType,url_healthcheck
   networks:
     - monitor
     - proxy
   ports:
     - 9090:9090
   deploy:
     replicas: 1
  alert-manager:
    image: prom/alertmanager:v0.15.2
    command:
      - '--storage.path=/alertmanager'
    networks:
      - monitor
  cadvisor:
    image: google/cadvisor:v0.30.2
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - /:/rootfs:ro
      - /var/run:/var/run:rw
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro
    ports:
      - 9101:9101
    networks:
      - monitor
    command:
      - '--port=9101'
    deploy:
      mode: global
      labels:
        - com.df.notify=true
        - com.df.scrapeNetwork=monitor
        - com.df.scrapePort=9101
        - com.df.metricType=system

Can you test this out and see if you see the exporter_cadvisor in the prometheus targets?

dorsany commented 6 years ago

Hi @thomasjpfan and @vfarcic ,

Thank you for your assistance. much appreciated

with the above configuration the cadvisor is working and I can get the targets and the scrape_configs is looking ok.

but when I'm trying to add the node exporter (like here) I don't see that a new job under the scrape_configs that has been created for it:

  node-exporter:
    image: prom/node-exporter:v0.16.0
    #user: root
    #privileged: true
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
      - /etc/hostname:/etc/host_hostname
        #- txt_file_exporter_data:/etc/node-exporter:ro
    environment:
      - HOST_HOSTNAME=/etc/host_hostname
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc|docker|tmpfs)($$|/)'
      - '--collector.filesystem.ignored-fs-types=^/(aufs|cgroup|devpts|mqueue|nsfs|sysfs|proc|tmpfs|loop|shm|none|overlay)($$|/)'
      - '--collector.textfile.directory=/etc/node-exporter'
    restart: always
    ports:
      - 9100:9100
    networks:
      - monitor
    deploy:
      mode: global
      restart_policy:
        delay: 5s
      resources:
        limits:
          cpus: '0.1'
          memory: '1gb'
    logging:
      options:
        max-size: 1g
    labels:
      - com.df.notify='true'
      - com.df.scrapeNetwork=monitor
      - com.df.scrapePort=9100
      - com.df.metricType=system

The metrics from node exporter is working within the monitor container:

docker exec -it management_monitor.1.mhdfsfsdfdf4353d3cz sh
/prometheus $ wget -qO- node-exporter:9100/metrics
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 0
go_gc_duration_seconds{quantile="0.25"} 0
go_gc_duration_seconds{quantile="0.5"} 0
go_gc_duration_seconds{quantile="0.75"} 0
go_gc_duration_seconds{quantile="1"} 0
go_gc_duration_seconds_sum 0
go_gc_duration_seconds_count 0
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 6
# HELP go_info Information about the Go environment.
# TYPE go_info gauge
go_info{version="go1.9.6"} 1
# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.
.......

logs:

docker service logs management_monitor
root@dor-swarm-manager-1:~/docker-compose/swarm# docker service logs management_monitor
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:48 Requesting services from Docker Flow Swarm Listener
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:48 Writing to prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:48 Starting Docker Flow Monitor
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:48 Starting Prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:48 /bin/sh -c prometheus --storage.tsdb.path="/prometheus" --storage.tsdb.retention="10d" --config.file="/etc/prometheus/prometheus.yml" --web.console.libraries="/usr/share/prometheus/console_libraries" --web.console.templates="/usr/share/prometheus/consoles"
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.689489597Z caller=main.go:222 msg="Starting Prometheus" version="(version=2.3.2, branch=HEAD, revision=71af5e29e815795e9dd14742ee7725682fa14b7b)"
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.689551636Z caller=main.go:223 build_context="(go=go1.10.3, user=root@5258e0bd9cc1, date=20180712-14:02:52)"
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.689566963Z caller=main.go:224 host_details="(Linux 4.15.0-1009-gcp #9-Ubuntu SMP Fri May 25 08:48:52 UTC 2018 x86_64 a74bfb67e8de (none))"
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.689579861Z caller=main.go:225 fd_limits="(soft=1048576, hard=1048576)"
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.690241962Z caller=main.go:533 msg="Starting TSDB ..."
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.690282559Z caller=web.go:415 component=web msg="Start listening for connections" address=0.0.0.0:9090
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.695334826Z caller=main.go:543 msg="TSDB started"
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.695383944Z caller=main.go:603 msg="Loading configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.696145164Z caller=main.go:629 msg="Completed loading of configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:48.696174478Z caller=main.go:502 msg="Server is ready to receive web requests."
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Processing /v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Writing to prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Reloading Prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 pkill -HUP prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:56.5367742Z caller=main.go:603 msg="Loading configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Prometheus was reloaded
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Processing /v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Writing to prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:56.537120297Z caller=main.go:629 msg="Completed loading of configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Reloading Prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 pkill -HUP prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:56.53783312Z caller=main.go:603 msg="Loading configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Prometheus was reloaded
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:56.538059277Z caller=main.go:629 msg="Completed loading of configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Processing /v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%!B(MISSING)%!B(MISSING)%!d(MISSING)or-swarm-worker-1%2C%!.(MISSING)4.56%2C%!j(MISSING)mhzbamahaatmbwwjkrdbp07a%5D%!C(MISSING)%!B(MISSING)%!d(MISSING)or-swarm-manager-1%2C%!.(MISSING)4.55%2C%!u(MISSING)948dq0gdh9rgn5j1xpufeusd%5D%!D(MISSING)&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Adding scrape management_cadvisor
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | {  0xc420254050 9101   management_cadvisor map[{dor-swarm-manager-1 10.0.4.55 u948dq0gdh9rgn5j1xpufeusd}:{} {dor-swarm-worker-1 10.0.4.56 jmhzbamahaatmbwwjkrdbp07a}:{}]}
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Writing to prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Reloading Prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 pkill -HUP prometheus
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | 2018/09/12 13:12:56 Prometheus was reloaded
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:56.676495039Z caller=main.go:603 msg="Loading configuration file" filename=/etc/prometheus/prometheus.yml
management_monitor.1.zpfxi13qwj38@dor-swarm-worker-1    | level=info ts=2018-09-12T13:12:56.677065301Z caller=main.go:629 msg="Completed loading of configuration file" filename=/etc/prometheus/prometheus.yml
root@dor-swarm-manager-1:~/docker-compose/swarm# docker service logs management_swarm-listener
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Starting Docker Flow: Swarm Listener
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Using Docker Client API version: 1.37
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Sending notifications for running services and nodes
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Listening to Docker Service Events
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Listening to Docker Node Events
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010 (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011 (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010 (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011 (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Sending service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011 (3 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010 (3 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:56 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (3 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:13:01 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (4 try)

current scrape file:

global:
  scrape_interval: 1m
  scrape_timeout: 1m
  evaluation_interval: 1m
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      - alert-manager:9093
    scheme: http
    timeout: 10s
scrape_configs:
- job_name: management_cadvisor
  scrape_interval: 1m
  scrape_timeout: 1m
  metrics_path: /metrics
  scheme: http
  file_sd_configs:
  - files:
    - /etc/prometheus/file_sd/management_cadvisor.json
    refresh_interval: 5m
thomasjpfan commented 6 years ago

@dorsanyvision Can you post the DFSL logs as well? I want to make sure that the node-exporter notification was sent to DFM.

dorsany commented 6 years ago

Hi @thomasjpfan , I already attached them above under: docker service logs management_swarm-listener but here are fresh ones:

docker service logs management_swarm-listener
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Starting Docker Flow: Swarm Listener
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Using Docker Client API version: 1.37
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Sending notifications for running services and nodes
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Listening to Docker Service Events
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Listening to Docker Node Events
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Sending node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010 (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:41 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011 (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010 (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011 (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Sending service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Sending service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:46 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (1 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.6&availability=active&hostname=dor-swarm-manager-1&id=u948dq0gdh9rgn5j1xpufeusd&role=manager&state=ready&versionIndex=27011 (3 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying node created notification to http://monitor:8080/v1/docker-flow-monitor/node/reconfigure?address=10.138.0.7&availability=active&hostname=dor-swarm-worker-1&id=jmhzbamahaatmbwwjkrdbp07a&role=worker&state=ready&versionIndex=27010 (3 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying service created notification to http://monitor:8080/v1/docker-flow-monitor/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:51 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (2 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:12:56 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (3 try)
management_swarm-listener.1.1esk4551p2xo@dor-swarm-manager-1    | 2018/09/12 13:13:01 Retrying service created notification to http://proxy:8080/v1/docker-flow-proxy/reconfigure?distribute=true&metricType=system&nodeInfo=%5B%5B%22dor-swarm-worker-1%22%2C%2210.0.4.56%22%2C%22jmhzbamahaatmbwwjkrdbp07a%22%5D%2C%5B%22dor-swarm-manager-1%22%2C%2210.0.4.55%22%2C%22u948dq0gdh9rgn5j1xpufeusd%22%5D%5D&scrapeNetwork=monitor&scrapePort=9101&serviceName=management_cadvisor (4 try)

here is the docker service ls:

docker service ls
ID                  NAME                        MODE                REPLICAS            IMAGE                                              PORTS
hd1t9vizzy1a        management_alert-manager    replicated          1/1                 prom/alertmanager:v0.15.2
838ii8wg6gam        management_cadvisor         global              2/2                 google/cadvisor:v0.30.2                            *:9101->9101/tcp
slfbkrtfh802        management_monitor          replicated          1/1                 dockerflow/docker-flow-monitor:latest              *:9090->9090/tcp
r30v808e8akg        management_node-exporter    global              2/2                 prom/node-exporter:v0.16.0                         *:9100->9100/tcp
ewfz4mkjlech        management_proxy            global              2/2                 dockerflow/docker-flow-proxy:18.09.05-6            *:80->80/tcp, *:443->443/tcp
nld5mf7ts9q0        management_swarm-listener   replicated          1/1                 dockerflow/docker-flow-swarm-listener:18.09.06-6
thomasjpfan commented 6 years ago

@dorsanyvision Your node-exporter configuration assigns container labels. The labels need to be service labels, by placing them under deploy:

    deploy:
      ...
      labels:
        com.df.notify: true
        com.df.scrapeNetwork: monitor
        com.df.scrapePort: 9100
        com.df.metricType: system