Closed mrocklin closed 1 year ago
I suspect that this isn't yet fully released, hence the issue, but this is failing in prod
import coiled cluster = coiled.Cluster(protocol="wss")
OSError Traceback (most recent call last) <ipython-input-4-4414c0a82aa2> in <module> ----> 1 cluster = coiled.Cluster(protocol="wss", name="mrocklin-4c652147-f") ~/miniconda/lib/python3.8/site-packages/coiled/cluster.py in __init__(self, name, n_workers, configuration, software, worker_cpu, worker_gpu, worker_memory, worker_class, worker_options, scheduler_cpu, scheduler_memory, scheduler_class, scheduler_options, asynchronous, cloud, account, shutdown_on_close, backend_options, credentials, timeout, environ, protocol) 351 352 if not self.asynchronous: --> 353 self.sync(self._start) 354 355 @property ~/miniconda/lib/python3.8/site-packages/coiled/cluster.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs) 387 **kwargs, 388 ) -> Union[_T, Awaitable[_T]]: --> 389 return super().sync( 390 func, 391 *args, ~/workspace/distributed/distributed/deploy/cluster.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs) 183 return future 184 else: --> 185 return sync(self.loop, func, *args, **kwargs) 186 187 def _log(self, log): ~/workspace/distributed/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs) 324 if error[0]: 325 typ, exc, tb = error[0] --> 326 raise exc.with_traceback(tb) 327 else: 328 return result[0] ~/workspace/distributed/distributed/utils.py in f() 307 if callback_timeout is not None: 308 future = asyncio.wait_for(future, callback_timeout) --> 309 result[0] = yield future 310 except Exception: 311 error[0] = sys.exc_info() ~/workspace/tornado/tornado/gen.py in run(self) 764 765 try: --> 766 value = future.result() 767 except Exception: 768 exc_info = sys.exc_info() ~/miniconda/lib/python3.8/site-packages/coiled/context.py in wrapper(*args, **kwargs) 36 async def wrapper(*args, **kwargs): 37 with operation_context(f"{func.__module__}.{func.__name__}"): ---> 38 return await func(*args, **kwargs) 39 40 return wrapper ~/miniconda/lib/python3.8/site-packages/coiled/cluster.py in _start(self) 487 raise 488 --> 489 await super()._start() 490 491 # TODO: Come up with a better long-term solution. Below we raise an informative error message ~/workspace/distributed/distributed/deploy/cluster.py in _start(self) 64 65 async def _start(self): ---> 66 comm = await self.scheduler_comm.live_comm() 67 await comm.write({"op": "subscribe_worker_status"}) 68 self.scheduler_info = SchedulerInfo(await comm.read()) ~/workspace/distributed/distributed/core.py in live_comm(self) 751 del self.comms[s] 752 if not open or comm.closed(): --> 753 comm = await connect( 754 self.address, 755 self.timeout, ~/workspace/distributed/distributed/comm/core.py in connect(addr, timeout, deserialize, handshake_overrides, **connection_args) 308 await asyncio.sleep(backoff) 309 else: --> 310 raise OSError( 311 f"Timed out trying to connect to {addr} after {timeout} s" 312 ) from active_exception OSError: Timed out trying to connect to wss://cloud.coiled.io/cluster/26550/ after 5 s
Also fails in staging. The cluster does seem to come up though.
I can also reproduce this. A week or so ago this worked for me, so I suspect a regression.
protocol support deprecated
I suspect that this isn't yet fully released, hence the issue, but this is failing in prod