Open agalera opened 7 years ago
I doubt you'd be able to use a thread for that, as it wouldn't be safe to read from/write to the amqp socket from multiple threads. You'd need to use a mutex for every socket read/write, which would slow things way down. You'd really have to use something like eventlet, gevent, asyncio or similar for this to work optimally.
identical exception with eventlet, any ideas?
amqp.exceptions.ConnectionForced: Too many heartbeats missed
it seems I'm not reading the answer RabbitMQ not?
"DEBUG:amqp:heartbeat_tick: sending heartbeat for connection 9ba9740dd54142399895c4d8f792b90b"
Thanks for your help
source:
from kombu import Connection, Exchange, Queue
from kombu.common import eventloop
import logging
import time
import sys
import weakref
import eventlet
from eventlet import spawn_after
eventlet.monkey_patch()
logging.basicConfig(level=10, stream=sys.stdout)
durable = True
name = 'rocket1'
def callback(body, message):
for x in range(22):
print(x)
time.sleep(1)
print("finish task")
message.ack()
return True
def monitor_heartbeats(connection, rate=2):
"""Function to send heartbeat checks to RabbitMQ. This keeps the
connection alive over long-running processes."""
interval = connection.heartbeat / 2.0
cref = weakref.ref(connection)
logging.info("Starting heartbeat monitor.")
def heartbeat_check():
conn = cref()
if conn is not None and conn.connected:
try:
conn.heartbeat_check(rate=rate)
logging.info("Heartbeat ok")
except:
logging.error("Heartbeat failed")
spawn_after(interval, heartbeat_check)
return spawn_after(interval, heartbeat_check)
with Connection('amqp://guest:guest@%s//?heartbeat=10' % 'localhost', heartbeat=10) as conn:
conn.ensure_connection()
monitor_heartbeats(conn)
exchange = Exchange(name, 'direct', durable=durable)
queue = Queue(name=name,
exchange=exchange,
durable=durable, routing_key=name)
queue(conn).declare()
logging.info("create queue: %s durable: %s" % (name, durable))
with conn.Consumer(queue, callbacks=[callback]) as consumer:
consumer.qos(prefetch_count=1)
for _ in eventloop(conn, timeout=1, ignore_timeouts=True):
pass
DEBUG:amqp:Start from server, version: 0.9, properties: {'cluster_name': 'rabbit@sb1', 'platform': 'Erlang/OTP', 'information': 'Licensed under the MPL. See http://www.rabbitmq.com/', 'product': 'RabbitMQ', 'capabilities': {'publisher_confirms': True, 'consumer_priorities': True, 'connection.blocked': True, 'consumer_cancel_notify': True, 'per_consumer_qos': True, 'authentication_failure_close': True, 'exchange_exchange_bindings': True, 'basic.nack': True}, 'copyright': 'Copyright (C) 2007-2014 GoPivotal, Inc.', 'version': '3.3.5'}, mechanisms: ['AMQPLAIN', 'PLAIN'], locales: ['en_US']
INFO:root:Starting heartbeat monitor.
DEBUG:amqp:using channel_id: 1
DEBUG:amqp:Channel open
INFO:root:create queue: rocket1 durable: True
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: None/None, now - 12/12, monotonic - 6904829.985433596, last_heartbeat_sent - 6904829.985424093, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 12/12, now - 12/12, monotonic - 6904832.487990192, last_heartbeat_sent - 6904829.985424093, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
0
1
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 12/12, now - 12/15, monotonic - 6904834.990522305, last_heartbeat_sent - 6904829.985424093, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
2
3
4
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 12/15, now - 12/15, monotonic - 6904837.493593533, last_heartbeat_sent - 6904829.985424093, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
5
6
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 12/15, now - 12/15, monotonic - 6904839.997124459, last_heartbeat_sent - 6904829.985424093, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick: sending heartbeat for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
7
8
9
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 12/15, now - 13/15, monotonic - 6904842.500985768, last_heartbeat_sent - 6904842.500978223, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
10
11
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 13/15, now - 13/15, monotonic - 6904845.0045556, last_heartbeat_sent - 6904842.500978223, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
12
13
14
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 13/15, now - 13/15, monotonic - 6904847.506937645, last_heartbeat_sent - 6904842.500978223, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
15
16
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 13/15, now - 13/15, monotonic - 6904850.009044969, last_heartbeat_sent - 6904842.500978223, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
17
18
19
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 13/15, now - 13/15, monotonic - 6904852.512543026, last_heartbeat_sent - 6904842.500978223, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick: sending heartbeat for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
20
21
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 13/15, now - 14/15, monotonic - 6904855.017000154, last_heartbeat_sent - 6904855.016993846, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat failed
22
finish task
DEBUG:amqp:heartbeat_tick : for connection 568036154faf4da9a7108bd36df760e3
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 16/15, now - 17/20, monotonic - 6904885.06410167, last_heartbeat_sent - 6904885.064089782, heartbeat int. - 10.0 for connection 568036154faf4da9a7108bd36df760e3
INFO:root:Heartbeat ok
How long does it take before you get the error?
I'm trying here with kombu 4.0 (dev) and it's been working fine so far with heartbeat=10 and heartbeat=1
Your code has been running for 2 hours with heartbeat=10 here now.
I used the "4.0-devel" branch and it seems the error persists.
When I launch a task from the callback if the work lasts longer than 20 seconds, the heartbeat begins to fail.
So that the code above fails, you must add a message to the queue named "rocket1"
DEBUG:amqp:heartbeat_tick : for connection 5aae7adcedf9417faf304f82d5cfafbd
DEBUG:amqp:heartbeat_tick : Prev sent/recv: 15/16, now - 15/16, monotonic - 9156374.278685136, last_heartbeat_sent - 9156369.272508236, heartbeat int. - 10.0 for connection 5aae7adcedf9417faf304f82d5cfafbd
HEARTBEAT FAILED
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python3.4/threading.py", line 920, in _bootstrap_inner
self.run()
File "/usr/lib/python3.4/threading.py", line 868, in run
self._target(*self._args, **self._kwargs)
File "server.py", line 28, in monitor_heartbeat
conn.heartbeat_check(rate)
File "/root/test123/test_kombu/kombu/kombu/connection.py", line 268, in heartbeat_check
return self.transport.heartbeat_check(self.connection, rate=rate)
File "/root/test123/test_kombu/kombu/kombu/transport/pyamqp.py", line 130, in heartbeat_check
return connection.heartbeat_tick(rate=rate)
File "/usr/local/lib/python3.4/dist-packages/amqp/connection.py", line 662, in heartbeat_tick
raise ConnectionForced('Too many heartbeats missed')
amqp.exceptions.ConnectionForced: Too many heartbeats missed
If the connection is closed when the callback spends more than 20 seconds, then it must mean that the callback is blocking the event loop from running!
You probably want to spawn that task into a separate greenlet thread.
But note also that when using eventlet/gevent any blocking call will block the eventloop, and that includes calculations like:
for i in long_list:
for j in another_long_list:
i ** j
gevent and eventlet uses cooperative scheduling, which means the threads need to voluntarily switch back into the event loop, such as by performing I/O or calling time.sleep:
for i in long_list:
for j in another_long_list:
i ** j
time.sleep(0)
Then there's also the problem with greenlet-incompatible code, such as C extensions that are not affected by the monkey patches.
What would be the right way to do with when using connection pools?
I found this error again and I found a solution (code below)
When I did this integration for the first time, I wanted to use the call that kombu does to my code, and this is a bad idea because we block the process with which rabbitmq communicates.
At first I thought that if I kept everything in an internal queue I would consume everything that was in rabbit, but it is not like that. If you set a prefetch of 10 (for example) it will only let you have 10 messages without you having returned the ack.
I leave the code here, I tried to keep it simple for the example.
Perhaps a similar example would be useful within the repository.
import logging
import sys
from queue import Queue as qq
from threading import Thread
from kombu import Connection, Exchange, Queue
from kombu.mixins import ConsumerMixin
logging.basicConfig(level=10, stream=sys.stdout)
durable = True
name = 'rocket1'
rabbit_url = "amqp://guest:guest@localhost:5672/"
class Worker(ConsumerMixin):
def __init__(self, connection, queues):
self.connection = connection
self.queues = queues
self.q = qq()
Thread(target=self.run_tasks).start()
def get_consumers(self, Consumer, channel):
return [Consumer(queues=self.queues,
callbacks=[self.on_message],
prefetch_count=1)]
def on_message(self, body, message):
print("new message to internal queue")
self.q.put((body, message))
def run_tasks(self):
while True:
try:
self.on_task(*self.q.get())
except Exception as ex:
logging.error(ex)
except KeyboardInterrupt:
break
def on_task(self, body, message):
print("run task")
import time
for x in range(50):
time.sleep(1)
print(x)
message.ack()
exchange = Exchange(name, type="direct", durable=durable)
queues = [Queue(name, exchange, routing_key=name)]
with Connection(rabbit_url, heartbeat=4) as conn:
worker = Worker(conn, queues)
worker.run()
@ask What do you think about this? Is there another more elegant way?
Hello, also facing this problem several years later, not sure what is the proper way to handle my rpc using kombu and RMQ, for long running tasks.
I cannot make the heartbeat too long, as the RMQ cluster is shared and disabling it does little favour to my application.
Trying to make use of the existing Connection
interface and using py-amqp
, i also centered around looping the heartbeat_check
method in the main thread and launching a second thread with the actual task, (but this thread must join to return a result).
I seems as if the heartbeat_check
method has no effect. I tried coupling it with drain_events
on a short timeout, but still I was not able to find a solution.
@ask It would be great to know what is the official way to deal with this. Should I not mess with the heartbeat, is it for internal use only? Is this issue planned to be solved in the future, or is it a non-issue/wont-fix? Am I missing the solution in one of the other bug reports?
The code, which did not work:
from kombu import Connection, Exchange, Queue
from kombu.mixins import ConsumerMixin
from threading import Thread, Event
import time
import logging
rabbit_url = "amqp://guest:guest@localhost:5672//"
exchange = Exchange("example-exchange", type="direct")
queue = Queue(name="example-queue", exchange=exchange, routing_key="BOB")
heartbeat_rate = 120
class ThreadReturningValue(Thread):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.result = None
def run(self):
self.result = self._target(*self._args, **self._kwargs)
def join(self, *args, **kwargs):
super().join(*args, **kwargs)
return self.result
def process_task(message, done_event):
print('Processing task that takes more than 2 * heartbeat seconds...')
time.sleep(300)
done_event.set()
print('event set')
return 'worker done'
class Worker(ConsumerMixin):
def __init__(self, connection, queues):
self.connection = connection
self.queues = queues
self.thread_done = Event()
self.worker_thread = None
def get_consumers(self, Consumer, channel):
return [Consumer(queues=self.queues, callbacks=[self.on_message])]
def on_message(self, body, message):
print('Received message: {0}'.format(body))
self.worker_thread = ThreadReturningValue(target=process_task, args=(message, self.thread_done))
self.worker_thread.start()
while not self.thread_done.is_set():
try:
self.connection.drain_events(timeout=0.1)
except Exception as e:
print(e)
self.connection.heartbeat_check()
print('heartbeat check')
time.sleep(1)
print('Task completed.')
self.worker_thread.join()
self.thread_done.clear()
message.ack()
print('Message acknowledged.')
reply = self.worker_thread.result
print('Reply: {0}'.format(reply))
with Connection(rabbit_url, heartbeat=heartbeat_rate) as conn:
conn.ensure_connection()
print('Connection supports heartbeats: {0}'.format(conn.supports_heartbeats))
worker = Worker(conn, [queue])
worker.run()
In the above snippet, for heartbeats * 2 < task-processing-time
, RMQ reschedules the message multiple times.
Versions:
I'm trying to send the heartbeat because sometimes I close the connection, I've been doing some tests and does not seem to work. what am I doing wrong?
Thx!
root@sb1:~/work/RocketTM/examples# python3 bla2.py
exception:
source: