dask / dask-yarn

Deploy dask on YARN clusters
http://yarn.dask.org
BSD 3-Clause "New" or "Revised" License
69 stars 41 forks source link

FileNotFoundError: [Errno 2] No such file or directory: 'yarn' #153

Closed lbonini94 closed 2 years ago

lbonini94 commented 2 years ago

I'm getting an error when i try to create a Yarn-Cluster

cluster = YarnCluster(n_workers=2,
                      worker_vcores=1,
                      worker_memory='2 GiB',
                      deploy_mode='local',
                      environment='conda:///home/user/anaconda3/envs/dask-yarn')

Traceback:

FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_240963/2590475338.py in <module>
----> 1 cluster = YarnCluster(n_workers=5,
      2                       worker_vcores=2,
      3                       worker_memory='3 GiB',
      4                       deploy_mode='local',
      5                       environment='conda:///home/lbonini/anaconda3/envs/dask-yarn')

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in __init__(self, environment, n_workers, worker_vcores, worker_memory, worker_restarts, worker_env, worker_class, worker_options, worker_gpus, scheduler_vcores, scheduler_gpus, scheduler_memory, deploy_mode, name, queue, tags, user, host, port, dashboard_address, skein_client, asynchronous, loop)
    414             user=user,
    415         )
--> 416         self._init_common(
    417             spec=spec,
    418             host=host,

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in _init_common(self, spec, application_client, host, port, dashboard_address, asynchronous, loop, skein_client)
    566 
    567         if not self.asynchronous:
--> 568             self._sync(self._start_internal())
    569 
    570     def _start_cluster(self):

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in _sync(self, task)
    737         future = asyncio.run_coroutine_threadsafe(task, self.loop.asyncio_loop)
    738         try:
--> 739             return future.result()
    740         except BaseException:
    741             future.cancel()

~/anaconda3/envs/dask-yarn/lib/python3.9/concurrent/futures/_base.py in result(self, timeout)
    443                     raise CancelledError()
    444                 elif self._state == FINISHED:
--> 445                     return self.__get_result()
    446                 else:
    447                     raise TimeoutError()

~/anaconda3/envs/dask-yarn/lib/python3.9/concurrent/futures/_base.py in __get_result(self)
    388         if self._exception:
    389             try:
--> 390                 raise self._exception
    391             finally:
    392                 # Break a reference cycle with the exception in self._exception

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in _start_internal(self)
    625             self._start_task = asyncio.ensure_future(self._start_async())
    626         try:
--> 627             await self._start_task
    628         except BaseException:
    629             # On exception, cleanup

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in _start_async(self)
    643             else:
    644                 self._scheduler = None
--> 645             await self.loop.run_in_executor(None, self._start_cluster)
    646         else:
    647             # Connect to an existing cluster

~/anaconda3/envs/dask-yarn/lib/python3.9/concurrent/futures/thread.py in run(self)
     50 
     51         try:
---> 52             result = self.fn(*self.args, **self.kwargs)
     53         except BaseException as exc:
     54             self.future.set_exception(exc)

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in _start_cluster(self)
    571         """Start the cluster and initialize state"""
    572 
--> 573         skein_client = _get_skein_client(self._skein_client)
    574 
    575         if "dask.scheduler" not in self.spec.services:

 ~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/dask_yarn/core.py in _get_skein_client(skein_client, security)
    114         with warnings.catch_warnings():
    115             warnings.simplefilter("ignore")
--> 116             return skein.Client(security=security)
    117     return skein_client
    118 

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/skein/core.py in __init__(self, address, security, keytab, principal, log, log_level, java_options)
    336 
    337         if address is None:
--> 338             address, proc = _start_driver(security=security,
    339                                           keytab=keytab,
    340                                           principal=principal,

~/anaconda3/envs/dask-yarn/lib/python3.9/site-packages/skein/core.py in _start_driver(security, set_global, keytab, principal, log, log_level, java_options)
    215     env['SKEIN_KEY'] = security._get_bytes('key')
    216     # Update the classpath in the environment
--> 217     classpath = (subprocess.check_output(['yarn', 'classpath', '--glob'])
    218                            .decode('utf-8'))
    219     env['CLASSPATH'] = '%s:%s' % (_SKEIN_JAR, classpath)

~/anaconda3/envs/dask-yarn/lib/python3.9/subprocess.py in check_output(timeout, *popenargs, **kwargs)
    422         kwargs['input'] = empty
    423 
--> 424     return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
    425                **kwargs).stdout
    426 

~/anaconda3/envs/dask-yarn/lib/python3.9/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs)
    503         kwargs['stderr'] = PIPE
    504 
--> 505     with Popen(*popenargs, **kwargs) as process:
    506         try:
    507             stdout, stderr = process.communicate(input, timeout=timeout)

~/anaconda3/envs/dask-yarn/lib/python3.9/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask)
    949                             encoding=encoding, errors=errors)
    950 
--> 951             self._execute_child(args, executable, preexec_fn, close_fds,
    952                                 pass_fds, cwd, env,
    953                                 startupinfo, creationflags, shell,

~/anaconda3/envs/dask-yarn/lib/python3.9/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session)
   1819                     if errno_num != 0:
   1820                         err_msg = os.strerror(errno_num)
-> 1821                     raise child_exception_type(errno_num, err_msg, err_filename)
   1822                 raise child_exception_type(err_msg)
   1823 

FileNotFoundError: [Errno 2] No such file or directory: 'yarn'

Environment:

jacobtomlinson commented 2 years ago

What happens if you run which yarn from the terminal?

lbonini94 commented 2 years ago

@jacobtomlinson got it! Yarn must be installed for dask-yarn work properly.

jacobtomlinson commented 2 years ago

That's it!