Closed antsbean closed 5 years ago
We've seen the same issue running a cluster of 3 nodes, using rabbitmq_peer_discovery_k8s plugin
root@rabbitmq-1:/# rabbitmq-diagnostics status
Status of node rabbit@rabbitmq-1.rabbitmq-cluster.web.svc.cluster.local ...
[{pid,399},
{running_applications,
[{prometheus_rabbitmq_exporter,
"RabbitMQ Prometheus.io metrics exporter","3.7.2.5"},
{prometheus_cowboy,[],"0.1.7"},
{rabbitmq_peer_discovery_k8s,
"Kubernetes-based RabbitMQ peer discovery backend","3.7.13"},
{rabbitmq_peer_discovery_common,
"Modules shared by various peer discovery backends","3.7.13"},
{rabbitmq_management,"RabbitMQ Management Console","3.7.13"},
{amqp_client,"RabbitMQ AMQP Client","3.7.13"},
{rabbitmq_management_agent,"RabbitMQ Management Agent","3.7.13"},
{rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.7.13"},
{rabbit,"RabbitMQ","3.7.13"},
{rabbit_common,
"Modules shared by rabbitmq-server and rabbitmq-erlang-client",
"3.7.13"},
{os_mon,"CPO CXC 138 46","2.4.7"},
{sysmon_handler,"Rate-limiting system_monitor event handler","1.1.0"},
{cowboy,"Small, fast, modern HTTP server.","2.6.1"},
{ranch,"Socket acceptor pool for TCP protocols.","1.7.1"},
{ssl,"Erlang/OTP SSL application","9.2.1"},
{public_key,"Public key infrastructure","1.6.5"},
{asn1,"The Erlang ASN1 compiler version 5.0.8","5.0.8"},
{prometheus_httpd,"Prometheus.io inets httpd exporter","2.1.10"},
{mnesia,"MNESIA CXC 138 12","4.15.6"},
{recon,"Diagnostic tools for production use","2.3.6"},
{prometheus,"Prometheus.io client in Erlang","4.2.2"},
{cowlib,"Support library for manipulating Web protocols.","2.7.0"},
{crypto,"CRYPTO","4.4.1"},
{xmerl,"XML parser","1.3.19"},
{accept,"Accept header(s) for Erlang/Elixir","0.3.5"},
{jsx,"a streaming, evented json parsing toolkit","2.9.0"},
{inets,"INETS CXC 138 49","7.0.6"},
{lager,"Erlang logging framework","3.6.5"},
{goldrush,"Erlang event stream processor","0.1.9"},
{compiler,"ERTS CXC 138 10","7.3.2"},
{syntax_tools,"Syntax tools","2.1.7"},
{sasl,"SASL CXC 138 11","3.3"},
{stdlib,"ERTS CXC 138 10","3.8"},
{kernel,"ERTS CXC 138 10","6.3"}]},
{os,{unix,linux}},
{erlang_version,
"Erlang/OTP 21 [erts-10.3] [source] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:128] [hipe]\n"},
{memory,
[{connection_readers,0},
{connection_writers,0},
{connection_channels,0},
{connection_other,19612},
{queue_procs,692244},
{queue_slave_procs,2869552},
{plugins,3775844},
{other_proc,24334256},
{metrics,215500},
{mgmt_db,2015960},
{mnesia,426992},
{other_ets,3401864},
{binary,2163152},
{msg_index,283248},
{code,32007583},
{atom,1517737},
{other_system,33918208},
{allocated_unused,30688360},
{reserved_unallocated,0},
{strategy,rss},
{total,[{erlang,107641752},{rss,128757760},{allocated,138330112}]}]},
{alarms,[]},
{listeners,[{clustering,25672,"::"},{amqp,5672,"::"},{http,15672,"::"}]},
{vm_memory_calculation_strategy,rss},
{vm_memory_high_watermark,0.4},
{vm_memory_limit,12649049292},
{disk_free_limit,50000000},
{disk_free,9902882816},
{file_descriptors,
[{total_limit,1048476},
{total_used,24},
{sockets_limit,943626},
{sockets_used,0}]},
{processes,[{limit,1048576},{used,620}]},
{run_queue,1},
{uptime,981},
{kernel,{net_ticktime,60}}]
2019-03-22 16:42:37.648 [error] <0.9263.0> CRASH REPORT Process <0.9263.0> with 0 neighbours crashed with reason: no case clause matching syncing in prometheus_rabbitmq_queues_collector:'-collect_mf/2-fun-3-'/1 line 75
2019-03-22 16:42:37.649 [error] <0.9262.0> Ranch listener rabbit_web_dispatch_sup_15672, connection process <0.9262.0>, stream 1 had its request process <0.9263.0> exit with reason {case_clause,syncing} and stacktrace [{prometheus_rabbitmq_queues_collector,'-collect_mf/2-fun-3-',1,[{file,"src/collectors/prometheus_rabbitmq_queues_collector.erl"},{line,75}]},{prometheus_rabbitmq_queues_collector,'-collect_metrics/2-lc$^1/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_queues_collector.erl"},{line,102}]},{prometheus_rabbitmq_queues_collector,'-collect_metrics/2-lc$^1/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_queues_collector.erl"},{line,102}]},{prometheus_model_helpers,create_mf,5,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/model/prometheus_model_helpers.erl"},{line,127}]},{prometheus_rabbitmq_queues_collector,mf,3,[{file,"src/collectors/prometheus_rabbitmq_queues_collector.erl"},{line,94}]},{prometheus_rabbitmq_queues_collector,'-collect_mf/2-lc$^4/1-2-',3,[{file,"src/collectors/prometheus_rabbitmq_queues_collector.erl"},{line,75}]},{prometheus_rabbitmq_queues_collector,collect_mf,2,[{file,"src/collectors/prometheus_rabbitmq_queues_collector.erl"},{line,75}]},{prometheus_collector,collect_mf,3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_collector.erl"},{line,141}]}]
I had the same issue,
I'm running a two node RabbitMQ cluster with docker, using the base image "rabbitmq:3-management". After running the performance test for a few minutes (in average 20 minutes) the plugin stops responding. Analyzing the logs I found the following message.
2019-04-03 20:18:27.872 [error] <0.3601.1> CRASH REPORT Process <0.3601.1> with 0 neighbours crashed with reason: no function clause matching proplists:get_value(get, 0, undefined) line 215
2019-04-03 20:18:27.873 [error] <0.3600.1> Ranch listener rabbit_web_dispatch_sup_15672, connection process <0.3600.1>, stream 1 had its request process <0.3601.1> exit with reason function_clause and stacktrace [{proplists,get_value,[get,0,undefined],[{file,"proplists.erl"},{line,215}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_metrics/2-lc$^0/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,177}]},{prometheus_model_helpers,create_mf,5,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/model/prometheus_model_helpers.erl"},{line,127}]},{prometheus_rabbitmq_core_metrics_collector,'-mf/3-lc$^2/1-1-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,169}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_mf/2-lc$^0/1-0-',2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,153}]},{prometheus_rabbitmq_core_metrics_collector,collect_mf,2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,154}]},{prometheus_collector,collect_mf,3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_collector.erl"},{line,141}]},{prometheus_registry,'-collect/2-lc$^0/1-0-',3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_registry.erl"},{line,86}]}]
I ran the following command:
docker run -it --rm --network host pivotalrabbitmq/perf-test:latest --uri amqp://username:password@localhost:5672 -x 1 -y 2 -u "throughput-test-12" --id "test-12" -f persistent --rate 5000 --consumer-rate 2000
Rabbitmq Status
[{pid,158},
{running_applications,
[{prometheus_rabbitmq_exporter,
"RabbitMQ Prometheus.io metrics exporter","3.7.2.5"},
{prometheus_cowboy,[],"0.1.7"},
{rabbitmq_management,"RabbitMQ Management Console","3.7.14"},
{amqp_client,"RabbitMQ AMQP Client","3.7.14"},
{rabbitmq_peer_discovery_aws,
"AWS-based RabbitMQ peer discovery backend","3.7.14"},
{rabbitmq_peer_discovery_common,
"Modules shared by various peer discovery backends","3.7.14"},
{rabbitmq_management_agent,"RabbitMQ Management Agent","3.7.14"},
{rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.7.14"},
{rabbit,"RabbitMQ","3.7.14"},
{rabbit_common,
"Modules shared by rabbitmq-server and rabbitmq-erlang-client",
"3.7.14"},
{cowboy,"Small, fast, modern HTTP server.","2.6.1"},
{ranch,"Socket acceptor pool for TCP protocols.","1.7.1"},
{rabbitmq_aws,
"A minimalistic AWS API interface used by rabbitmq-autocluster (3.6.x) and other RabbitMQ plugins",
"3.7.14"},
{ssl,"Erlang/OTP SSL application","9.2.1"},
{public_key,"Public key infrastructure","1.6.5"},
{asn1,"The Erlang ASN1 compiler version 5.0.8","5.0.8"},
{os_mon,"CPO CXC 138 46","2.4.7"},
{cowlib,"Support library for manipulating Web protocols.","2.7.0"},
{crypto,"CRYPTO","4.4.1"},
{prometheus_httpd,"Prometheus.io inets httpd exporter","2.1.10"},
{recon,"Diagnostic tools for production use","2.4.0"},
{prometheus,"Prometheus.io client in Erlang","4.2.2"},
{xmerl,"XML parser","1.3.20"},
{mnesia,"MNESIA CXC 138 12","4.15.6"},
{sysmon_handler,"Rate-limiting system_monitor event handler","1.1.0"},
{jsx,"a streaming, evented json parsing toolkit","2.9.0"},
{accept,"Accept header(s) for Erlang/Elixir","0.3.5"},
{inets,"INETS CXC 138 49","7.0.6"},
{prometheus_process_collector,
"Prometheus.io process collector\n Collector exports the current state of process metrics including cpu, memory,\n file descriptor usage and native threads count as well as the process start and up times.",
"1.4.3"},
{lager,"Erlang logging framework","3.6.9"},
{goldrush,"Erlang event stream processor","0.1.9"},
{compiler,"ERTS CXC 138 10","7.3.2"},
{syntax_tools,"Syntax tools","2.1.7"},
{sasl,"SASL CXC 138 11","3.3"},
{stdlib,"ERTS CXC 138 10","3.8.1"},
{kernel,"ERTS CXC 138 10","6.3.1"}]},
{os,{unix,linux}},
{erlang_version,
"Erlang/OTP 21 [erts-10.3.2] [source] [64-bit] [smp:2:2] [ds:2:2:10] [async-threads:64] [hipe]\n"},
{memory,
[{connection_readers,104372},
{connection_writers,198040},
{connection_channels,10296856},
{connection_other,190268},
{queue_procs,16604332},
{queue_slave_procs,0},
{plugins,3143612},
{other_proc,31609940},
{metrics,224724},
{mgmt_db,886056},
{mnesia,91248},
{other_ets,2943056},
{binary,4557368},
{msg_index,29104},
{code,28375249},
{atom,1213657},
{other_system,23213310},
{allocated_unused,32851544},
{reserved_unallocated,120737792},
{strategy,rss},
{total,[{erlang,123681192},{rss,277270528},{allocated,156532736}]}]},
{alarms,[]},
{listeners,[{clustering,25672,"::"},{amqp,5672,"::"},{http,15672,"::"}]},
{vm_memory_calculation_strategy,rss},
{vm_memory_high_watermark,0.4},
{vm_memory_limit,3345183539},
{disk_free_limit,16725917696},
{disk_free,49809350656},
{file_descriptors,
[{total_limit,1048476},
{total_used,6},
{sockets_limit,943626},
{sockets_used,3}]},
{processes,[{limit,1048576},{used,434}]},
{run_queue,1},
{uptime,8778},
{kernel,{net_ticktime,60}}]
I got the same issue today. When I first configured the containers, everything was fine. Then I ran my application, that generates some messages inside RabbitMQ, so I can see some metrics in Grafana (with prometheus). After this I was getting HTTP 500 if I try the /api/metrics of RabbitMQ (and in Prometheus is shown that the metrics is down for RabbitMQ). If I stop the docker container and starts again, everything is fine. The moment I sent something to RabbitMQ it crashes again. Any insights about this?
The error for me:
2019-04-08 00:41:47 =CRASH REPORT====
crasher:
initial call: cowboy_stream_h:request_process/3
pid: <0.1107.0>
registered_name: []
exception error: {function_clause,[{proplists,get_value,[get,0,undefined],[{file,"proplists.erl"},{line,215}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_metrics/2-lc$^0/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,177}]},{prometheus_model_helpers,create_mf,5,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/model/prometheus_model_helpers.erl"},{line,127}]},{prometheus_rabbitmq_core_metrics_collector,'-mf/3-lc$^2/1-1-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,169}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_mf/2-lc$^0/1-0-',2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,153}]},{prometheus_rabbitmq_core_metrics_collector,collect_mf,2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,154}]},{prometheus_collector,collect_mf,3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_collector.erl"},{line,141}]},{prometheus_registry,'-collect/2-lc$^0/1-0-',3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_registry.erl"},{line,86}]}]}
ancestors: [<0.1106.0>,<0.563.0>,<0.562.0>,rabbit_web_dispatch_sup,<0.515.0>]
message_queue_len: 0
messages: []
links: [<0.1106.0>,#Port<0.144>]
dictionary: []
trap_exit: false
status: running
heap_size: 4185
stack_size: 27
reductions: 49340
neighbours:
2019-04-08 00:41:47 =ERROR REPORT====
Ranch listener rabbit_web_dispatch_sup_15672, connection process <0.1106.0>, stream 1 had its request process <0.1107.0> exit with reason function_clause and stacktrace [{proplists,get_value,[get,0,undefined],[{file,"proplists.erl"},{line,215}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_metrics/2-lc$^0/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,177}]},{prometheus_model_helpers,create_mf,5,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/model/prometheus_model_helpers.erl"},{line,127}]},{prometheus_rabbitmq_core_metrics_collector,'-mf/3-lc$^2/1-1-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,169}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_mf/2-lc$^0/1-0-',2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,153}]},{prometheus_rabbitmq_core_metrics_collector,collect_mf,2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,154}]},{prometheus_collector,collect_mf,3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_collector.erl"},{line,141}]},{prometheus_registry,'-collect/2-lc$^0/1-0-',3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_registry.erl"},{line,86}]}]
Rabbitmq status:
[{pid,345},
{running_applications,
[{prometheus_rabbitmq_exporter,
"RabbitMQ Prometheus.io metrics exporter","3.7.2.5"},
{rabbitmq_management,"RabbitMQ Management Console","3.7.13"},
{rabbitmq_management_agent,"RabbitMQ Management Agent","3.7.13"},
{rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.7.13"},
{cowboy,"Small, fast, modern HTTP server.","2.6.1"},
{rabbit,"RabbitMQ","3.7.13"},
{mnesia,"MNESIA CXC 138 12","4.15.6"},
{amqp_client,"RabbitMQ AMQP Client","3.7.13"},
{rabbit_common,
"Modules shared by rabbitmq-server and rabbitmq-erlang-client",
"3.7.13"},
{ranch,"Socket acceptor pool for TCP protocols.","1.7.1"},
{ssl,"Erlang/OTP SSL application","9.2.1"},
{public_key,"Public key infrastructure","1.6.5"},
{asn1,"The Erlang ASN1 compiler version 5.0.8","5.0.8"},
{prometheus_cowboy,[],"0.1.7"},
{prometheus_httpd,"Prometheus.io inets httpd exporter","2.1.10"},
{cowlib,"Support library for manipulating Web protocols.","2.7.0"},
{os_mon,"CPO CXC 138 46","2.4.7"},
{prometheus,"Prometheus.io client in Erlang","4.2.2"},
{recon,"Diagnostic tools for production use","2.3.6"},
{jsx,"a streaming, evented json parsing toolkit","2.9.0"},
{inets,"INETS CXC 138 49","7.0.6"},
{xmerl,"XML parser","1.3.20"},
{sysmon_handler,"Rate-limiting system_monitor event handler","1.1.0"},
{accept,"Accept header(s) for Erlang/Elixir","0.3.5"},
{crypto,"CRYPTO","4.4.1"},
{lager,"Erlang logging framework","3.6.5"},
{goldrush,"Erlang event stream processor","0.1.9"},
{compiler,"ERTS CXC 138 10","7.3.2"},
{syntax_tools,"Syntax tools","2.1.7"},
{sasl,"SASL CXC 138 11","3.3"},
{stdlib,"ERTS CXC 138 10","3.8"},
{kernel,"ERTS CXC 138 10","6.3"}]},
{os,{unix,linux}},
{erlang_version,
"Erlang/OTP 21 [erts-10.3.1] [source] [64-bit] [smp:2:2] [ds:2:2:10] [async-threads:64] [hipe]\n"},
{memory,
[{connection_readers,109956},
{connection_writers,5940},
{connection_channels,20480},
{connection_other,174748},
{queue_procs,40856},
{queue_slave_procs,0},
{plugins,1624148},
{other_proc,26681212},
{metrics,223484},
{mgmt_db,308632},
{mnesia,90744},
{other_ets,2838744},
{binary,902336},
{msg_index,58208},
{code,28127455},
{atom,1180881},
{other_system,22955632},
{allocated_unused,14988064},
{reserved_unallocated,0},
{strategy,rss},
{total,[{erlang,85343456},{rss,90222592},{allocated,100331520}]}]},
{alarms,[]},
{listeners,[{clustering,25672,"::"},{amqp,5672,"::"},{http,15672,"::"}]},
{vm_memory_calculation_strategy,rss},
{vm_memory_high_watermark,0.4},
{vm_memory_limit,830613094},
{disk_free_limit,50000000},
{disk_free,50668847104},
{file_descriptors,
[{total_limit,1048476},
{total_used,6},
{sockets_limit,943626},
{sockets_used,1}]},
{processes,[{limit,1048576},{used,429}]},
{run_queue,0},
{uptime,2374},
{kernel,{net_ticktime,60}}]
Thank you for the reports, will look. @wesley-ramos, can you share your docker compose/config?
@deadtrickster I don't know if mine helps, but here you go: In my docker compose
# … elided for brevity …
rabbitmq1:
image: rabbitmq:3-management
ports:
- 15672:15672
- 5672:5672
environment:
RABBITMQ_ERLANG_COOKIE: SWQOKODSQALRPCLNMEQG
RABBITMQ_DEFAULT_USER: rabbitmq
RABBITMQ_DEFAULT_PASS: rabbitmq
RABBITMQ_DEFAULT_VHOST: /
volumes:
- "./rabbitmq/enabled_plugins:/etc/rabbitmq/enabled_plugins"
- "./rabbitmq/accept-0.3.5.ez:/opt/rabbitmq/plugins/accept-0.3.5.ez"
- "./rabbitmq/prometheus_cowboy-0.1.7.ez:/opt/rabbitmq/plugins/prometheus_cowboy-0.1.7.ez"
- "./rabbitmq/prometheus_httpd-2.1.10.ez:/opt/rabbitmq/plugins/prometheus_httpd-2.1.10.ez"
- "./rabbitmq/prometheus_process_collector-1.4.3.ez:/opt/rabbitmq/plugins/pluginsprometheus_process_collector-1.4.3.ez"
- "./rabbitmq/prometheus_rabbitmq_exporter-3.7.2.5.ez:/opt/rabbitmq/plugins/prometheus_rabbitmq_exporter-3.7.2.5.ez"
- "./rabbitmq/prometheus-4.2.2.ez:/opt/rabbitmq/plugins/prometheus-4.2.2.ez"
prometheus:
image: prom/prometheus
ports:
- 9090:9090
volumes:
- "./prometheus/config.yml:/etc/prometheus/prometheus.yml"
command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus"
depends_on:
- rabbitmq1
My configs: rabbitmq/enabled_plugins
[rabbitmq_management, prometheus_rabbitmq_exporter].
prometheus/config.yml
# my global config
global:
scrape_interval: 60s # By default, scrape targets every 15 seconds.
evaluation_interval: 60s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'c4m-prometheus-monitor'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first.rules"
# - "second.rules"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# Override the global default
scrape_interval: 30s
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
#Monitoring rabbitmq:
- job_name: rabbitmq
scrape_interval: 30s
scrape_timeout: 30s
metrics_path: /api/metrics
static_configs:
- targets: ['rabbitmq1:15672']
Thank you for the reports, will look. @wesley-ramos, can you share your docker compose/config?
Dockerfile
FROM rabbitmq:3-management
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates wget
RUN cd /plugins && \
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/accept-0.3.5.ez && \
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/prometheus-4.2.2.ez && \
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/prometheus_cowboy-0.1.7.ez && \
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/prometheus_httpd-2.1.10.ez && \
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/prometheus_process_collector-1.4.3.ez && \
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/prometheus_rabbitmq_exporter-3.7.2.5.ez
COPY rabbitmq.conf /etc/rabbitmq/rabbitmq.conf
RUN rabbitmq-plugins --offline enable rabbitmq_peer_discovery_aws && \
rabbitmq-plugins enable prometheus_rabbitmq_exporter && \
rabbitmq-plugins enable prometheus_process_collector
EXPOSE 15672 5672 4369 25672
rabbitmq.conf
default_vhost = /
default_user = user
default_pass = password
cluster_formation.peer_discovery_backend = rabbit_peer_discovery_aws
cluster_formation.aws.region = us-east-1
cluster_formation.aws.access_key_id = my-access-key
cluster_formation.aws.secret_key = my-secret
cluster_formation.aws.instance_tags.application = rabbitmq
vm_memory_high_watermark_paging_ratio = 0.5
vm_memory_high_watermark.relative = 0.4
disk_free_limit.relative = 2.0
I suspected the error was caused by the large number of metrics, so I limited some metrics using the configuration below. But that did not solve
rabbitmq.config
[
{rabbit, [
{default_vhost,<<"/">>},
{default_user, <<"admin">>},
{vm_memory_high_watermark, 0.4},
{disk_free_limit, {mem_relative, 2.0}}
]
},
{prometheus,[
{collectors,[
prometheus_vm_statistics_collector,
prometheus_vm_memory_collector,
prometheus_process_collector,
prometheus_rabbitmq_queues_collector,
prometheus_rabbitmq_core_metrics_collector,
prometheus_rabbitmq_overview_collector]},
{rabbitmq_exporter,[
{queue_messages_stat,[messages_published_total,messages_delivered_total,messages_redelivered_total]},
{exchange_messages_stat,[]}]}
]
}
].
I'm also running into the same problem. I've got 3 nodes running in Kubernetes and 1 node gives the normal results, but the other 2 nodes give a code 500 with the same error message as above.
Any ideas where to look for a solution?
for now only workaround - excluding core collector from collectors. at least this allows to continue with the rest. Still for me the full docker compose/setup for the cluster would help. Plus load generator. Something allowing me to just fire compose up, see it fails and I'll go to container's shell.
The same issue here, for me, disabling HiPE solves it. This is the configuration used:
Dockerfile
FROM rabbitmq:3.7.13-management
COPY prometheus_rabbitmq_exporter/*.ez /opt/rabbitmq/plugins/
RUN rabbitmq-plugins enable --offline rabbitmq_peer_discovery_k8s rabbitmq_shovel rabbitmq_shovel_management prometheus_rabbitmq_exporter prometheus_process_collector rabbitmq_top
enabled_plugins
[rabbitmq_management,rabbitmq_peer_discovery_k8s,rabbitmq_shovel,rabbitmq_shovel_management,prometheus_rabbitmq_exporter,prometheus_process_collector,rabbitmq_top].
rabbitmq.config
cluster_formation.peer_discovery_backend = rabbit_peer_discovery_k8s
cluster_formation.k8s.address_type = hostname
cluster_formation.node_cleanup.interval = 10
cluster_formation.node_cleanup.only_log_warning = true
cluster_partition_handling = autoheal
loopback_users.guest = false
hipe_compile = true
vm_memory_high_watermark.relative = 0.7
log.syslog = true
log.syslog.transport = udp
log.syslog.port = 5140
Disabling hipe was helping me.
I'm still seeing the issue with hipe compilation disabled
2019-04-26 08:40:55.534 [error] <0.28015.10> CRASH REPORT Process <0.28015.10> with 0 neighbours crashed with reason: no function clause matching proplists:get_value(get, 0, undefined) line 215
2019-04-26 08:40:55.535 [error] <0.28014.10> Ranch listener rabbit_web_dispatch_sup_15672, connection process <0.28014.10>, stream 1 had its request process <0.28015.10> exit with reason function_clause and stacktrace [{proplists,get_value,[get,0,undefined],[{file,"proplists.erl"},{line,215}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_metrics/2-lc$^0/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,177}]},{prometheus_model_helpers,create_mf,5,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/model/prometheus_model_helpers.erl"},{line,127}]},{prometheus_rabbitmq_core_metrics_collector,'-mf/3-lc$^2/1-1-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,169}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_mf/2-lc$^0/1-0-',2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,153}]},{prometheus_rabbitmq_core_metrics_collector,collect_mf,2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,154}]},{prometheus_collector,collect_mf,3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_collector.erl"},{line,141}]},{prometheus_registry,'-collect/2-lc$^0/1-0-',3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_registry.erl"},{line,86}]}]
2019-04-26 08:41:25.536 [error] <0.28062.10> CRASH REPORT Process <0.28062.10> with 0 neighbours crashed with reason: no function clause matching proplists:get_value(get, 0, undefined) line 215
2019-04-26 08:41:25.537 [error] <0.28061.10> Ranch listener rabbit_web_dispatch_sup_15672, connection process <0.28061.10>, stream 1 had its request process <0.28062.10> exit with reason function_clause and stacktrace [{proplists,get_value,[get,0,undefined],[{file,"proplists.erl"},{line,215}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_metrics/2-lc$^0/1-0-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,177}]},{prometheus_model_helpers,create_mf,5,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/model/prometheus_model_helpers.erl"},{line,127}]},{prometheus_rabbitmq_core_metrics_collector,'-mf/3-lc$^2/1-1-',3,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,169}]},{prometheus_rabbitmq_core_metrics_collector,'-collect_mf/2-lc$^0/1-0-',2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,153}]},{prometheus_rabbitmq_core_metrics_collector,collect_mf,2,[{file,"src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"},{line,154}]},{prometheus_collector,collect_mf,3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_collector.erl"},{line,141}]},{prometheus_registry,'-collect/2-lc$^0/1-0-',3,[{file,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/deps/prometheus/src/prometheus_registry.erl"},{line,86}]}]
Actually it was working short but chrashed short after with same error 2019-04-26 09:21:38.323 [error] <0.1351.0> CRASH REPORT Process <0.1351.0> with 0 neighbours crashed with reason: no function clause matching proplists:get_value(get, 0, undefined) line 215
Actually it was working short but chrashed short after with same error 2019-04-26 09:21:38.323 [error] <0.1351.0> CRASH REPORT Process <0.1351.0> with 0 neighbours crashed with reason: no function clause matching proplists:get_value(get, 0, undefined) line 215
Do you have a solution?
@deadtrickster not sure if this is helpful, but here is what i've been able to dig with dbg:
(<0.1674.0>) call prometheus_rabbitmq_core_metrics_collector:collect_metrics(["rabbitmq_core_",<<"channel_queue_get">>],{counter,#Fun<prometheus_rabbitmq_core_metrics_collector.0.62683835>,
[{{<0.1559.0>,
{resource,<<"/">>,queue,<<"amq.gen-v2dNE23_D7AoufGCm2AW4w">>}},
0,0,258707,0,0,258704,0}]})
Seems like it fails right after this trace message
Yep, Fun is called and fails,
I think it's Fun = fun(D) -> proplists:get_value(Key, element(Index, D)) end,
(line 168). Or, expanded: Fun = fun(D) -> proplists:get_value(get, element(2, D)) end
and data {{<0.1559.0>, {resource,<<"/">>,queue,<<"amq.gen-v2dNE23_D7AoufGCm2AW4w">>}}, 0,0,258707,0,0,258704,0}
.
Maybe version mismatch, @gerhard ?
i used your own image, so i hope its not version mismatch then
Will look into this shortly.
@wesley-ramos can you put your config into a docker-compose.yml
that we can all share? You can use this as a starting point: https://github.com/rabbitmq/rabbitmq-prometheus/blob/master/docker/docker-compose.yml
Today ran away, will pick up next week.
These crashes were due to incorrect mappings to metrics stored in ETS tables.
I just want to make sure that everyone is aware that https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/tag/v3.7.2.5 requires Erlang/OTP v21.0.2 or newer. The beam files were produced with compiler-7.2.2
which was introduced in https://github.com/erlang/otp/releases/tag/OTP-21.0.2
wget https://github.com/deadtrickster/prometheus_rabbitmq_exporter/releases/download/v3.7.2.5/prometheus_rabbitmq_exporter-3.7.2.5.ez
unzip prometheus_rabbitmq_exporter-3.7.2.5.ez
cd prometheus_rabbitmq_exporter-3.7.2.5/ebin
erl -pa $PWD
3> {_, Beam, _} = code:get_object_code(prometheus_rabbitmq_core_metrics_collector).
{prometheus_rabbitmq_core_metrics_collector,<<70,79,82,49,
0,0,63,68,
66,69,65,77,
65,116,85,
56,0,0,2,
235,0,0,0,
61,42,112,
114,...>>,
"/Users/gerhard/Downloads/prometheus_rabbitmq_exporter-3.7.2.5/ebin/prometheus_rabbitmq_core_metrics_collector.beam"}
4> beam_lib:chunks(Beam, [compile_info]).
{ok,{prometheus_rabbitmq_core_metrics_collector,[{compile_info,[{version,"7.2.2"},
{options,[debug_info,
{i,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/include"},
{lager_extra_sinks,[rabbit_log,rabbit_log_channel,
rabbit_log_connection,rabbit_log_ldap,rabbit_log_mirroring,
rabbit_log_queue,rabbit_log_federation,rabbit_log_upgrade]},
{parse_transform,lager_transform},
warn_obsolete_guard,warn_shadow_vars,warn_export_vars]},
{source,"/home/dead/Projects/rabbitmq/prometheus_rabbitmq_exporter/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl"}]}]}}
The reason why I mention this is because everyone on this thread is using Docker. The first Docker image that has a compatible Erlang/OTP version is rabbitmq:3.7.9
:
docker run -it --rm rabbitmq:3.7.9 -- cat /usr/local/lib/erlang/releases/21/OTP_VERSION
21.2.5
# rabbitmq:3.7.8 ships with Erlang/OTP 20.3.8.5
# which will fail to run prometheus_rabbitmq_exporter-3.7.2.5.ez
docker run -it --rm rabbitmq:3.7.8 -- cat /usr/lib/erlang/releases/20/OTP_VERSION
20.3.8.5
The above explains why https://github.com/rabbitmq/prometheus_rabbitmq_exporter/commit/138a863b2a36546154ac67fb7df0147955d58502 was necessary.
@deadtrickster you might want to bump the version to 3.7.9.x
since it might catch users that try to run this plugin with Docker images rabbitmq:3.7.{2..8}
We have a fix in Docker image pivotalrabbitmq/prometheus_rabbitmq_exporter:80
Who wants to try it out and confirm that it works?
Anyone?
I have deployed the custom docker image to our staging environment and no longer see the errors that I mentioned previously
merging then?
Sounds good to me!
Just tested metrics endpoint with plugins from the container and no errors occured. BTW my setup was not dockerized, I used Erlang 21.3 (erlang-base-hipe) from Erlang-Solutions repository on a Debian 9 VM running with RabbitMQ 3.7.14
I used Erlang 21.3 (erlang-base-hipe) from Erlang-Solutions repository on a Debian 9 VM running with RabbitMQ 3.7.14
FWIW, we also maintain a Debian repo for Erlang.
rabbitmq about info
but prometheus_rabbitmq_exporter crashed