rabbitmq / rabbitmq-server

Open source RabbitMQ: core server and tier 1 (built-in) plugins
https://www.rabbitmq.com/
Other
12.27k stars 3.91k forks source link

Random generic server crashes in the log #38

Closed jnordberg closed 10 years ago

jnordberg commented 10 years ago

I'm getting crashes when publishing messages, it seems to happen every 20th message i publish or so. Maybe i'm doing something wrong but I can't make heads or tails of the error logs.

==> /var/log/rabbitmq/rabbit@safetyos.log <==

=INFO REPORT==== 13-Aug-2014::14:23:04 ===
accepting AMQP connection <0.25428.0> (10.0.2.2:58391 -> 10.0.2.15:5672)

=ERROR REPORT==== 13-Aug-2014::14:23:19 ===
** Generic server <0.25434.0> terminating
** Last message in was {'$gen_cast',
                           {method,
                               {'basic.publish',0,<<"test-exchange">>,
                                   <<"routingKey">>,true,false},
                               {content,60,none,
                                   <<144,0,16,97,112,112,108,105,99,97,116,105,
                                     111,110,47,106,115,111,110,2,0>>,
                                   rabbit_framing_amqp_0_9_1,
                                   [<<"{\"idx\":15,\"prio\":0}">>]},
                               flow}}
** When Server state == {ch,running,rabbit_framing_amqp_0_9_1,1,<0.25428.0>,
                         <0.25432.0>,<0.25428.0>,
                         <<"10.0.2.2:58391 -> 10.0.2.15:5672">>,
                         {lstate,<0.25433.0>,false},
                         none,1,
                         {[],[]},
                         {user,<<"guest">>,
                          [administrator],
                          rabbit_auth_backend_internal,
                          {internal_user,<<"guest">>,
                           <<35,182,104,22,26,135,171,44,2,196,216,67,159,241,
                             160,42,233,1,84,197>>,
                           [administrator]}},
                         <<"/">>,<<>>,
                         {dict,1,16,16,8,80,48,
                          {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
                          {{[],[],[],[],[],[],[],[],[],[],
                            [[<0.25439.0>|
                              {resource,<<"/">>,queue,<<"my-queue">>}]],
                            [],[],[],[],[]}}},
                         {state,
                          {dict,1,16,16,8,80,48,
                           {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
                           {{[],[],[],[],[],[],[],[],[],[],
                             [[<0.25439.0>|#Ref<0.0.0.153346>]],
                             [],[],[],[],[]}}},
                          erlang},
                         {dict,0,16,16,8,80,48,
                          {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
                          {{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
                         {dict,0,16,16,8,80,48,
                          {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
                          {{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
                         {set,0,16,16,8,80,48,
                          {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},
                          {{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}},
                         <0.25429.0>,
                         {state,fine,5000,#Ref<0.0.0.153503>},
                         true,15,
                         {{0,nil},{0,nil}},
                         [],
                         {{0,nil},{0,nil}},
                         [],none,0}
** Reason for termination ==
** {{badmatch,<<0>>},
    [{rabbit_framing_amqp_0_9_1,decode_properties,2,[]},
     {rabbit_binary_parser,ensure_content_decoded,1,[]},
     {rabbit_channel,handle_method,3,[]},
     {rabbit_channel,handle_cast,2,[]},
     {gen_server2,handle_msg,2,[]},
     {proc_lib,wake_up,3,[{file,"proc_lib.erl"},{line,249}]}]}

==> /var/log/rabbitmq/rabbit@safetyos-sasl.log <==

=CRASH REPORT==== 13-Aug-2014::14:23:19 ===
  crasher:
    initial call: gen:init_it/6
    pid: <0.25434.0>
    registered_name: []
    exception exit: {{badmatch,<<0>>},
                     [{rabbit_framing_amqp_0_9_1,decode_properties,2,[]},
                      {rabbit_binary_parser,ensure_content_decoded,1,[]},
                      {rabbit_channel,handle_method,3,[]},
                      {rabbit_channel,handle_cast,2,[]},
                      {gen_server2,handle_msg,2,[]},
                      {proc_lib,wake_up,3,
                                [{file,"proc_lib.erl"},{line,249}]}]}
      in function  gen_server2:terminate/3
    ancestors: [<0.25431.0>,<0.25430.0>,<0.25427.0>,<0.25426.0>,
                  rabbit_tcp_client_sup,rabbit_sup,<0.139.0>]
    messages: []
    links: [<0.25431.0>]
    dictionary: [{process_name,
                      {rabbit_channel,
                          {<<"10.0.2.2:58391 -> 10.0.2.15:5672">>,1}}},
                  {{credit_to,<0.25428.0>},35},
                  {{queue_exchange_stats,
                       {{resource,<<"/">>,queue,<<"my-queue">>},
                        {resource,<<"/">>,exchange,<<"test-exchange">>}}},
                   [{publish,14}]},
                  {{xtype_to_module,topic},rabbit_exchange_type_topic},
                  {pause_minority_guard,not_minority_mode},
                  {{exchange_stats,
                       {resource,<<"/">>,exchange,<<"test-exchange">>}},
                   [{confirm,14},{publish,14}]},
                  {permission_cache,
                      [{{resource,<<"/">>,exchange,<<"test-exchange">>},
                        write}]},
                  {{credit_from,<0.25439.0>},186},
                  {guid,{{4231563882,87170283,4179743796,3585566879},13}}]
    trap_exit: true
    status: running
    heap_size: 1598
    stack_size: 27
    reductions: 10968
  neighbours:

=SUPERVISOR REPORT==== 13-Aug-2014::14:23:19 ===
     Supervisor: {<0.25431.0>,rabbit_channel_sup}
     Context:    child_terminated
     Reason:     {{badmatch,<<0>>},
                  [{rabbit_framing_amqp_0_9_1,decode_properties,2,[]},
                   {rabbit_binary_parser,ensure_content_decoded,1,[]},
                   {rabbit_channel,handle_method,3,[]},
                   {rabbit_channel,handle_cast,2,[]},
                   {gen_server2,handle_msg,2,[]},
                   {proc_lib,wake_up,3,[{file,"proc_lib.erl"},{line,249}]}]}
     Offender:   [{pid,<0.25434.0>},
                  {name,channel},
                  {mfargs,
                      {rabbit_channel,start_link,
                          [1,<0.25428.0>,<0.25432.0>,<0.25428.0>,
                           <<"10.0.2.2:58391 -> 10.0.2.15:5672">>,
                           rabbit_framing_amqp_0_9_1,
                           {user,<<"guest">>,
                               [administrator],
                               rabbit_auth_backend_internal,
                               {internal_user,<<"guest">>,
                                   <<35,182,104,22,26,135,171,44,2,196,216,67,
                                     159,241,160,42,233,1,84,197>>,
                                   [administrator]}},
                           <<"/">>,[],<0.25429.0>,<0.25433.0>]}},
                  {restart_type,intrinsic},
                  {shutdown,4294967295},
                  {child_type,worker}]

=SUPERVISOR REPORT==== 13-Aug-2014::14:23:19 ===
     Supervisor: {<0.25431.0>,rabbit_channel_sup}
     Context:    shutdown
     Reason:     reached_max_restart_intensity
     Offender:   [{pid,<0.25434.0>},
                  {name,channel},
                  {mfargs,
                      {rabbit_channel,start_link,
                          [1,<0.25428.0>,<0.25432.0>,<0.25428.0>,
                           <<"10.0.2.2:58391 -> 10.0.2.15:5672">>,
                           rabbit_framing_amqp_0_9_1,
                           {user,<<"guest">>,
                               [administrator],
                               rabbit_auth_backend_internal,
                               {internal_user,<<"guest">>,
                                   <<35,182,104,22,26,135,171,44,2,196,216,67,
                                     159,241,160,42,233,1,84,197>>,
                                   [administrator]}},
                           <<"/">>,[],<0.25429.0>,<0.25433.0>]}},
                  {restart_type,intrinsic},
                  {shutdown,4294967295},
                  {child_type,worker}]

==> /var/log/rabbitmq/rabbit@safetyos.log <==

=ERROR REPORT==== 13-Aug-2014::14:23:19 ===
AMQP connection <0.25428.0> (running), channel 1 - error:
{{badmatch,<<0>>},
 [{rabbit_framing_amqp_0_9_1,decode_properties,2,[]},
  {rabbit_binary_parser,ensure_content_decoded,1,[]},
  {rabbit_channel,handle_method,3,[]},
  {rabbit_channel,handle_cast,2,[]},
  {gen_server2,handle_msg,2,[]},
  {proc_lib,wake_up,3,[{file,"proc_lib.erl"},{line,249}]}]}

=WARNING REPORT==== 13-Aug-2014::14:23:19 ===
Non-AMQP exit reason '{{badmatch,<<0>>},
                       [{rabbit_framing_amqp_0_9_1,decode_properties,2,[]},
                        {rabbit_binary_parser,ensure_content_decoded,1,[]},
                        {rabbit_channel,handle_method,3,[]},
                        {rabbit_channel,handle_cast,2,[]},
                        {gen_server2,handle_msg,2,[]},
                        {proc_lib,wake_up,3,
                                  [{file,"proc_lib.erl"},{line,249}]}]}'

=INFO REPORT==== 13-Aug-2014::14:23:19 ===
closing AMQP connection <0.25428.0> (10.0.2.2:58391 -> 10.0.2.15:5672)

=INFO REPORT==== 13-Aug-2014::14:23:20 ===
accepting AMQP connection <0.25447.0> (10.0.2.2:58401 -> 10.0.2.15:5672)

Running CentOS 6 in VirtualBox

$ erl
Erlang/OTP 17 [erts-6.1] [source-d2a4c20] [smp:2:2] [async-threads:10] [kernel-poll:false]
michaelklishin commented 10 years ago

@jnordberg can you please post your publishing code (in particular the message metadata/BasicProperties) and the output of rabbitmq report? Thanks.

jnordberg commented 10 years ago
Reporting server status on {{2014,8,13},{14,33,50}}

 ...
Status of node rabbit@safetyos ...
[{pid,1401},
 {running_applications,
     [{rabbitmq_management,"RabbitMQ Management Console","3.3.5"},
      {rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.3.5"},
      {webmachine,"webmachine","1.10.3-rmq3.3.5-gite9359c7"},
      {mochiweb,"MochiMedia Web Server","2.7.0-rmq3.3.5-git680dba8"},
      {rabbitmq_management_agent,"RabbitMQ Management Agent","3.3.5"},
      {rabbit,"RabbitMQ","3.3.5"},
      {os_mon,"CPO  CXC 138 46","2.2.15"},
      {inets,"INETS  CXC 138 49","5.10.2"},
      {mnesia,"MNESIA  CXC 138 12","4.12.1"},
      {amqp_client,"RabbitMQ AMQP Client","3.3.5"},
      {xmerl,"XML parser","1.3.7"},
      {sasl,"SASL  CXC 138 11","2.4"},
      {stdlib,"ERTS  CXC 138 10","2.1"},
      {kernel,"ERTS  CXC 138 10","3.0.1"}]},
 {os,{unix,linux}},
 {erlang_version,
     "Erlang/OTP 17 [erts-6.1] [source-d2a4c20] [smp:2:2] [async-threads:30] [kernel-poll:true]\n"},
 {memory,
     [{total,23947552},
      {connection_procs,2776},
      {queue_procs,2776},
      {plugins,240824},
      {other_proc,8844672},
      {mnesia,31272},
      {mgmt_db,59388},
      {msg_index,12544},
      {other_ets,553828},
      {binary,198488},
      {code,10566398},
      {atom,531937},
      {other_system,2902649}]},
 {alarms,[]},
 {listeners,[{clustering,25672,"::"},{amqp,5672,"::"}]},
 {vm_memory_high_watermark,0.4},
 {vm_memory_limit,794127564},
 {disk_free_limit,50000000},
 {disk_free,13210066944},
 {file_descriptors,
     [{total_limit,924},{total_used,3},{sockets_limit,829},{sockets_used,1}]},
 {processes,[{limit,1048576},{used,179}]},
 {run_queue,0},
 {uptime,91}]

Cluster status of node rabbit@safetyos ...
[{nodes,[{disc,[rabbit@safetyos]}]},
 {running_nodes,[rabbit@safetyos]},
 {cluster_name,<<"rabbit@safetyos">>},
 {partitions,[]}]

Application environment of node rabbit@safetyos ...
[{auth_backends,[rabbit_auth_backend_internal]},
 {auth_mechanisms,['PLAIN','AMQPLAIN']},
 {backing_queue_module,rabbit_variable_queue},
 {channel_max,0},
 {cluster_nodes,{[],disc}},
 {cluster_partition_handling,ignore},
 {collect_statistics,fine},
 {collect_statistics_interval,5000},
 {default_permissions,[<<".*">>,<<".*">>,<<".*">>]},
 {default_user,<<"guest">>},
 {default_user_tags,[administrator]},
 {default_vhost,<<"/">>},
 {delegate_count,16},
 {disk_free_limit,50000000},
 {enabled_plugins_file,"/etc/rabbitmq/enabled_plugins"},
 {error_logger,{file,"/var/log/rabbitmq/rabbit@safetyos.log"}},
 {frame_max,131072},
 {halt_on_upgrade_failure,true},
 {heartbeat,580},
 {hipe_compile,false},
 {hipe_modules,[rabbit_reader,rabbit_channel,gen_server2,rabbit_exchange,
                rabbit_command_assembler,rabbit_framing_amqp_0_9_1,
                rabbit_basic,rabbit_event,lists,queue,priority_queue,
                rabbit_router,rabbit_trace,rabbit_misc,rabbit_binary_parser,
                rabbit_exchange_type_direct,rabbit_guid,rabbit_net,
                rabbit_amqqueue_process,rabbit_variable_queue,
                rabbit_binary_generator,rabbit_writer,delegate,gb_sets,lqueue,
                sets,orddict,rabbit_amqqueue,rabbit_limiter,gb_trees,
                rabbit_queue_index,rabbit_exchange_decorator,gen,dict,ordsets,
                file_handle_cache,rabbit_msg_store,array,
                rabbit_msg_store_ets_index,rabbit_msg_file,
                rabbit_exchange_type_fanout,rabbit_exchange_type_topic,mnesia,
                mnesia_lib,rpc,mnesia_tm,qlc,sofs,proplists,credit_flow,pmon,
                ssl_connection,tls_connection,ssl_record,tls_record,gen_fsm,
                ssl]},
 {included_applications,[]},
 {log_levels,[{connection,info}]},
 {loopback_users,[]},
 {msg_store_file_size_limit,16777216},
 {msg_store_index_module,rabbit_msg_store_ets_index},
 {plugins_dir,"/usr/lib/rabbitmq/lib/rabbitmq_server-3.3.5/sbin/../plugins"},
 {plugins_expand_dir,"/var/lib/rabbitmq/mnesia/rabbit@safetyos-plugins-expand"},
 {queue_index_max_journal_entries,65536},
 {reverse_dns_lookups,false},
 {sasl_error_logger,{file,"/var/log/rabbitmq/rabbit@safetyos-sasl.log"}},
 {server_properties,[]},
 {ssl_apps,[asn1,crypto,public_key,ssl]},
 {ssl_cert_login_from,distinguished_name},
 {ssl_listeners,[]},
 {ssl_options,[]},
 {tcp_listen_options,[binary,
                      {packet,raw},
                      {reuseaddr,true},
                      {backlog,128},
                      {nodelay,true},
                      {linger,{true,0}},
                      {exit_on_close,false}]},
 {tcp_listeners,[5672]},
 {trace_vhosts,[]},
 {vm_memory_high_watermark,0.4},
 {vm_memory_high_watermark_paging_ratio,0.5}]

Connections:

Channels:

Queues on /:

Exchanges on /:
name    type    durable auto_delete internal    arguments   policy
    direct  true    false   false   []
amq.direct  direct  true    false   false   []
amq.fanout  fanout  true    false   false   []
amq.headers headers true    false   false   []
amq.match   headers true    false   false   []
amq.rabbitmq.log    topic   true    false   true    []
amq.rabbitmq.trace  topic   true    false   true    []
amq.topic   topic   true    false   false   []
test-exchange   topic   true    false   false   []

Bindings on /:

Consumers on /:

Permissions on /:
user    configure   write   read
guest   .*  .*  .*

Policies on /:

Parameters on /:

...done.
jnordberg commented 10 years ago

I just figured it out when trying to write a small code example that could reproduce it, in some cases I was sending priority: 0 which causes this error.

var amqp = require('amqp')
var connection = amqp.createConnection({host: 'localhost'})

connection.on('error', function(error) {
  console.log('connection error', error)
})

connection.once('ready', function() {
  console.log('connection ready')

  var exchangeOpts = {
    durable: true,
    autoDelete: false,
    confirm: true
  }

  connection.exchange('test-exchange', exchangeOpts, function(exchange) {
    console.log('exchange open')
    var msg = {hello: 'world'}
    var opts = {priority: 0}
    console.log('publish message', msg);
    exchange.publish('test', msg, opts, function() {
      console.log('message published', msg, arguments)
    })
  })

  connection.queue('my-queue', function(queue) {
    console.log('queue ready')
    queue.bind('test-exchange', 'test')
    queue.subscribe(function(message) {
      console.log('got message', message);
    })
  })

})
$ node queue-crash.js
connection ready
exchange open
publish message { hello: 'world' }
queue ready
Unhandled connection error: INTERNAL_ERROR
connection error { [Error: INTERNAL_ERROR] code: 541 }
michaelklishin commented 10 years ago

@jnordberg is this using node-amqp? node-amqp is known to have issues and is not maintained, so I'd recommend trying with amqp.lib.

In general, priority = 0 should be fine but I cannot reproduce the issue so far with Ruby and Java clients. So my guess is that node-amqp simply does not serialize things properly when priority equals 0.

jnordberg commented 10 years ago

@michaelklishin yep, node-amqp was the culprit. Sorry for the hassle and thanks for the amqp.lib tip, setting priority to 0 there causes no problems.