When a unit starts after upgrade, wait_for_active_shards=all causes a timeout (since the upgrading unit is offline)
example of timeout
```
unit-opensearch-2: 11:14:14 ERROR unit.opensearch/2.juju-log Error creating OpenSearch lock document
Traceback (most recent call last):
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 467, in _make_request
six.raise_from(e, None)
File "", line 3, in raise_from
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 462, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.10/http/client.py", line 1375, in getresponse
response.begin()
File "/usr/lib/python3.10/http/client.py", line 318, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.10/http/client.py", line 279, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/lib/python3.10/socket.py", line 705, in readinto
return self._sock.recv_into(b)
File "/usr/lib/python3.10/ssl.py", line 1303, in recv_into
return self.read(nbytes, buffer)
File "/usr/lib/python3.10/ssl.py", line 1159, in read
return self._sslobj.read(len, buffer)
TimeoutError: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/requests/adapters.py", line 486, in send
resp = conn.urlopen(
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 799, in urlopen
retries = retries.increment(
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/util/retry.py", line 550, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/packages/six.py", line 770, in reraise
raise value
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 715, in urlopen
httplib_response = self._make_request(
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 469, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 358, in _raise_timeout
raise ReadTimeoutError(
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='10.139.243.54', port=9200): Read timed out. (read timeout=5)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/lib/juju/agents/unit-opensearch-2/charm/lib/charms/opensearch/v0/opensearch_distro.py", line 272, in request
resp = call(urls[0])
File "/var/lib/juju/agents/unit-opensearch-2/charm/lib/charms/opensearch/v0/opensearch_distro.py", line 223, in call
for attempt in Retrying(
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/tenacity/__init__.py", line 347, in __iter__
do = self.iter(retry_state=retry_state)
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/tenacity/__init__.py", line 325, in iter
raise retry_exc.reraise()
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/tenacity/__init__.py", line 158, in reraise
raise self.last_attempt.result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/var/lib/juju/agents/unit-opensearch-2/charm/lib/charms/opensearch/v0/opensearch_distro.py", line 250, in call
response = s.request(**request_kwargs)
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/requests/sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/requests/sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/requests/adapters.py", line 532, in send
raise ReadTimeout(e, request=request)
requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='10.139.243.54', port=9200): Read timed out. (read timeout=5)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/lib/juju/agents/unit-opensearch-2/charm/lib/charms/opensearch/v0/opensearch_locking.py", line 264, in acquired
response = self._opensearch.request(
File "/var/lib/juju/agents/unit-opensearch-2/charm/lib/charms/opensearch/v0/opensearch_distro.py", line 279, in request
raise OpenSearchHttpError(response_text=str(e))
charms.opensearch.v0.opensearch_exceptions.OpenSearchHttpError: HTTP error self.response_code=None
self.response_text="HTTPSConnectionPool(host='10.139.243.54', port=9200): Read timed out. (read timeout=5)"
```
263 broke upgrades since whenever unit checks if it has opensearch lock, it runs
https://github.com/canonical/opensearch-operator/blob/2071f1946f1852b77c4a86b1ed055520728b79ed/lib/charms/opensearch/v0/opensearch_locking.py#L262-L280
When a unit starts after upgrade,
wait_for_active_shards=all
causes a timeout (since the upgrading unit is offline)example of timeout
``` unit-opensearch-2: 11:14:14 ERROR unit.opensearch/2.juju-log Error creating OpenSearch lock document Traceback (most recent call last): File "/var/lib/juju/agents/unit-opensearch-2/charm/venv/urllib3/connectionpool.py", line 467, in _make_request six.raise_from(e, None) File "This could be worked around by checking if a unit already has the opensearch lock instead of trying to create it, but that would bypass this check https://github.com/canonical/opensearch-operator/blob/2071f1946f1852b77c4a86b1ed055520728b79ed/lib/charms/opensearch/v0/opensearch_locking.py#L282-L295
Potential solutions:
wait_for_active_shards=all