def _connect_loop(self, retry):
# Iterate through the hosts a full cycle before starting over
status = None
host_ports = self._expand_client_hosts()
# Check for an empty hostlist, indicating none resolved
if len(host_ports) == 0:
return STOP_CONNECTING
for host, port in host_ports:
if self.client._stopped.is_set():
status = STOP_CONNECTING
break
status = self._connect_attempt(host, port, retry)
if status is STOP_CONNECTING:
break
...
class KazooRetry:
...
def __call__(self, func, *args, **kwargs):
self.reset()
while True:
try:
if self.deadline is not None and self._cur_stoptime is None:
self._cur_stoptime = time.time() + self.deadline
return func(*args, **kwargs)
except ConnectionClosedError:
raise
except self.retry_exceptions:
# Note: max_tries == -1 means infinite tries.
if self._attempts == self.max_tries:
raise RetryFailedError("Too many retry attempts")
...
...
when all sockets failed to reconnect, it will just retry for once and then return STOP_CONNECTING.
so I suggest if len(host_ports) == 0 (all zookeeper server can not be connected at runtime), a RETRY_EXCEPTIONS should be thrown, eg.
in connection.py def _connect_loop(self, retry):
if len(host_ports) == 0:
raise ForceRetryError()
not just return STOP_CONNECTING
then, except of self.retry_exceptions in retry.py will be triggered to force class KazooRetry keep retrying.
when zookeeper server return to normal, our services will reconnect to zookeeper server.
thanks
in kazoo/protocol/connection.py https://github.com/python-zk/kazoo/blob/master/kazoo/protocol/connection.py
in retry.py https://github.com/python-zk/kazoo/blob/master/kazoo/retry.py