coiled / feedback

A place to provide Coiled feedback
14 stars 3 forks source link

Running `condaforge/miniconda3` docker image and `coilded.Cluster(use_magic=True)` exception #178

Closed costrouc closed 1 year ago

costrouc commented 2 years ago

Describe the bug A clear and concise description of the bug.

Reference Links, optional Is there a GitHub issue or Slack thread with more details?

In [1]: import coiled
c
In [2]: c = coiled.Cluster(use_magic=True)
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Input In [2], in <cell line: 1>()
----> 1 c = coiled.Cluster(use_magic=True)

File /opt/conda/lib/python3.9/site-packages/coiled/_beta/cluster.py:365, in ClusterBeta.__init__(self, name, software, n_workers, worker_class, worker_options, worker_vm_types, worker_cpu, worker_memory, worker_disk_size, worker_gpu, worker_gpu_type, scheduler_class, scheduler_options, scheduler_vm_types, scheduler_cpu, scheduler_memory, asynchronous, cloud, account, shutdown_on_close, use_scheduler_public_ip, credentials, timeout, environ, tags, backend_options, show_widget, configure_logging, wait_for_workers, use_magic)
    363     error = e
    364     self.close()
--> 365     raise e
    366 finally:
    367     if error:

File /opt/conda/lib/python3.9/site-packages/coiled/_beta/cluster.py:347, in ClusterBeta.__init__(self, name, software, n_workers, worker_class, worker_options, worker_vm_types, worker_cpu, worker_memory, worker_disk_size, worker_gpu, worker_gpu_type, scheduler_class, scheduler_options, scheduler_vm_types, scheduler_cpu, scheduler_memory, asynchronous, cloud, account, shutdown_on_close, use_scheduler_public_ip, credentials, timeout, environ, tags, backend_options, show_widget, configure_logging, wait_for_workers, use_magic)
    345 error = None
    346 try:
--> 347     self.sync(self._start)
    348 except ClusterCreationError as e:
    349     error = e

File /opt/conda/lib/python3.9/site-packages/coiled/cluster.py:527, in Cluster.sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    519 def sync(
    520     self,
    521     func: Callable[..., Awaitable[_T]],
   (...)
    525     **kwargs,
    526 ) -> Union[_T, Awaitable[_T]]:
--> 527     return super().sync(
    528         func,
    529         *args,
    530         asynchronous=asynchronous,
    531         callback_timeout=callback_timeout,
    532         **kwargs,
    533     )

File /opt/conda/lib/python3.9/site-packages/distributed/utils.py:338, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    336     return future
    337 else:
--> 338     return sync(
    339         self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    340     )

File /opt/conda/lib/python3.9/site-packages/distributed/utils.py:405, in sync(loop, func, callback_timeout, *args, **kwargs)
    403 if error:
    404     typ, exc, tb = error
--> 405     raise exc.with_traceback(tb)
    406 else:
    407     return result

File /opt/conda/lib/python3.9/site-packages/distributed/utils.py:378, in sync.<locals>.f()
    376         future = asyncio.wait_for(future, callback_timeout)
    377     future = asyncio.ensure_future(future)
--> 378     result = yield future
    379 except Exception:
    380     error = sys.exc_info()

File /opt/conda/lib/python3.9/site-packages/tornado/gen.py:762, in Runner.run(self)
    759 exc_info = None
    761 try:
--> 762     value = future.result()
    763 except Exception:
    764     exc_info = sys.exc_info()

File /opt/conda/lib/python3.9/site-packages/coiled/context.py:77, in track_context.<locals>.wrapper(*args, **kwargs)
     75 else:
     76     with operation_context(name=f"{func.__module__}.{func.__qualname__}"):
---> 77         return await func(*args, **kwargs)

File /opt/conda/lib/python3.9/site-packages/coiled/_beta/cluster.py:580, in ClusterBeta._start(self)
    573     if not self.auto_env:
    574         parse_identifier(
    575             self.software_environment,
    576             property_name="software_environment",
    577             can_have_revision=False,
    578         )
--> 580     self.cluster_id = await cloud._create_cluster(
    581         account=self.account,
    582         name=self.name,
    583         workers=self._start_n_workers,
    584         software_environment=self.software_environment,
    585         worker_class=self.worker_class,
    586         worker_options=self.worker_options,
    587         worker_cpu=self.worker_cpu,
    588         worker_memory=self.worker_memory,
    589         worker_disk_size=self.worker_disk_size,
    590         gcp_worker_gpu_type=self.worker_gpu_type,
    591         gcp_worker_gpu_count=self.worker_gpu_count,
    592         scheduler_class=self.scheduler_class,
    593         scheduler_options=self.scheduler_options,
    594         scheduler_cpu=self.scheduler_cpu,
    595         scheduler_memory=self.scheduler_memory,
    596         environ=self.environ,
    597         tags=self.tags,
    598         scheduler_vm_types=scheduler_vm_types_to_use
    599         or default_instance_types,
    600         worker_vm_types=worker_vm_types_to_use or default_instance_types,
    601         backend_options=self.backend_options,
    602         use_scheduler_public_ip=self.use_scheduler_public_ip,
    603         auto_env=self.auto_env,
    604     )
    605 if not self.cluster_id:
    606     raise RuntimeError(f"Failed to find/create cluster {self.name}")

File /opt/conda/lib/python3.9/site-packages/coiled/context.py:77, in track_context.<locals>.wrapper(*args, **kwargs)
     75 else:
     76     with operation_context(name=f"{func.__module__}.{func.__qualname__}"):
---> 77         return await func(*args, **kwargs)

File /opt/conda/lib/python3.9/site-packages/coiled/_beta/core.py:556, in CloudBeta._create_cluster(self, name, software_environment, worker_class, worker_options, worker_cpu, worker_memory, scheduler_class, scheduler_options, scheduler_cpu, scheduler_memory, account, workers, environ, tags, scheduler_vm_types, gcp_worker_gpu_type, gcp_worker_gpu_count, worker_vm_types, worker_disk_size, backend_options, use_scheduler_public_ip, auto_env)
    553 if auto_env:
    554     # TODO: more sensible logging
    555     logger.info("Magic environment resolving...")
--> 556     pip_env, conda_env = await magic.create_environment_approximation()
    557     logger.info("Environment magic complete")
    558     data["auto_env"] = {"pip": pip_env, "conda": conda_env}

File /opt/conda/lib/python3.9/site-packages/coiled/magic.py:235, in create_environment_approximation()
    233 conda_env_future = asyncio.create_task(create_conda_env_approximation())
    234 pip_env_future = loop.run_in_executor(None, create_pip_env_approximation)
--> 235 conda_env = await conda_env_future
    236 pip_env = await pip_env_future
    237 for required_dep in ["dask", "distributed", "bokeh"]:

File /opt/conda/lib/python3.9/site-packages/coiled/magic.py:174, in create_conda_env_approximation()
    172     logger.info(f"Conda environment detected: {conda_default_env}")
    173     conda_env: typing.Dict[str, CondaPackageInfo] = {}
--> 174     return await iterate_conda_packages(prefix=Path(conda_prefix))
    175 else:
    176     # User is not using conda, we should just grab their python version
    177     # so we know what to install
    178     conda_env: typing.Dict[str, CondaPackageInfo] = {
    179         "python": {
    180             "name": "python",
   (...)
    184         }
    185     }

File /opt/conda/lib/python3.9/site-packages/coiled/magic.py:156, in iterate_conda_packages(prefix)
    153 cache = RepoCache()
    155 if conda_meta.exists() and conda_meta.is_dir():
--> 156     packages = await asyncio.gather(
    157         *[
    158             handle_conda_package(metafile, cache)
    159             for metafile in conda_meta.iterdir()
    160             if metafile.suffix == ".json"
    161         ]
    162     )
    163     return {pkg["name"]: pkg for pkg in packages}
    164 else:

File /opt/conda/lib/python3.9/site-packages/coiled/magic.py:111, in handle_conda_package(pkg_fp, cache)
    110 async def handle_conda_package(pkg_fp: Path, cache: RepoCache):
--> 111     pkg = CondaPackage(json.load(pkg_fp.open("r")))
    112     try:
    113         parsed_version = version.parse(pkg.version)

File /opt/conda/lib/python3.9/site-packages/coiled/magic.py:46, in CondaPackage.__init__(self, meta_json)
     44 channel_regex = f"(.*)/(.*)/{self.subdir}"
     45 result = re.match(channel_regex, meta_json["channel"])
---> 46 assert result
     47 self.channel_url = result.group(1) + "/" + result.group(2)
     48 self.channel = result.group(2)

AssertionError: 

cc @shughes-uk

shughes-uk commented 2 years ago

Interesting, are any of your conda installed packages from a custom channel? This is exploding trying to parse the channel url.

The output of conda list would be helpful for debugging

hayesgb commented 2 years ago

@costrouc -- Can you share the output of conda list?

shughes-uk commented 1 year ago

stale