Open philipbjorge opened 9 months ago
Thank you for letting us know about this. I've let our product team know, we'll decide how we want to handle this issue.
This our workaround, we are a python application.
import os
from time import sleep
import httpx
class HerokuClient:
def __init__(self, app: str | None = None, api_key: str | None = None) -> None:
self.heroku_app = app or os.environ["HEROKU_APP"]
self.api_key = api_key or os.environ["HEROKU_API_KEY"]
self.client = httpx.Client(
base_url=f"https://api.heroku.com/apps/{self.heroku_app}",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
},
)
def retrieve_last_release(self):
previous_release = self.client.get(
"/releases",
headers={
"Accept": "application/vnd.heroku+json; version=3",
"Range": "version ..; max=2, order=desc",
},
)
previous_release.raise_for_status()
return previous_release.json()[0]
def release(self, processes: list[str]):
# Retrieve the previous release
last_release = self.retrieve_last_release()
# Trigger the release
print(f"🚀 releasing {', '.join(processes)}") # noqa: T201)
self.client.patch(
"/formation",
json={
"updates": [
{"type": target, "docker_image": self.latest_image_digest(target=target)}
for target in processes
]
},
headers={"Accept": "application/vnd.heroku+json; version=3.docker-releases"},
)
# Retrieve the new release
# We give Heroku a tiny bit of time between triggering the formation update and retrieving the current release
# to mitigate any race conditions between their microservices.
sleep(10)
current_release = self.retrieve_last_release()
# Ensure the release was actually created
if (not last_release and not current_release) or (
last_release and last_release["id"] == current_release["id"]
):
print("❌ failed to release - new release was not created") # noqa: T201
exit(1)
return current_release
def stream(self, url: str, retries=30):
range_header = {}
streamed_bytes = 0
print(f"attemping stream {url}") # noqa: T201
while True:
try:
with httpx.stream("GET", url, headers=range_header) as stream:
stream.raise_for_status()
for chunk in stream.iter_text():
print(chunk, end="") # noqa: T201
streamed_bytes += len(chunk)
break
except httpx.HTTPError as e:
if retries > 0:
retries -= 1
range_header["Range"] = f"bytes={streamed_bytes}"
if streamed_bytes > 0:
sleep(4)
else:
sleep(8)
else:
raise e
def latest_image_digest(self, target: str):
manifest_request = self.client.get(
f"https://registry.heroku.com/v2/{self.heroku_app}/{target}/manifests/latest",
auth=httpx.BasicAuth("_", self.api_key),
headers={
"Accept": "application/vnd.docker.distribution.manifest.v2+json",
},
)
manifest_request.raise_for_status()
return manifest_request.json()["config"]["digest"]
def ensure_successful_release(self, retries=10):
max_sleep = 8 # Maximum sleep time in seconds
attempt = 0 # Current attempt number
while True:
try:
current_release = self.retrieve_last_release()
if current_release["status"] == "failed" or retries <= 0:
print( # noqa: T201
f"❌ failed to release / status={current_release['status']} / https://dashboard.heroku.com/apps/{self.heroku_app}"
)
exit(1)
elif current_release["status"] == "succeeded":
print("✅ successfully released") # noqa: T201
return
except Exception: # noqa: S110
pass
sleep_time = min(2**attempt, max_sleep)
sleep(sleep_time)
attempt += 1
retries -= 1
if __name__ == "__main__":
client = HerokuClient()
# Trigger the release
current_release = client.release(processes=PROCESSES)
# Stream the output
if current_release["status"] == "pending" and current_release["output_stream_url"]:
print( # noqa: T201
"🪵 the current release is pending -- attempting to stream release logs..."
)
print("===============================================\n\n") # noqa: T201
try:
client.stream(current_release["output_stream_url"])
except Exception:
print("❌ streaming release output failed; release is ongoing...") # noqa: T201
print("\n\n===============================================\n\n") # noqa: T201
else:
print( # noqa: T201
f"🤔 the current release is {current_release['status']} with output stream url {current_release['output_stream_url']} -- skipping log streaming..."
)
# Ensure the release phase completed successfully
client.ensure_successful_release()
In terms of impact, we see about 5-10% failure rate on deploys using the CLI implementation.
What is the current behavior?
Version:
8.7.1
When using
heroku container:release
, we periodically run into the following errors:ECONNRESET
404 Channel is not registered.
What is the expected behavior?
Streaming logs should be resilient to connection resets per the API docs:
Releasing should be resilient to the race condition on the underlying Heroku platform: