fsspec / sshfs

sshfs - SSH/SFTP implementation for fsspec
Apache License 2.0
58 stars 14 forks source link

_cat_file implementation #30

Open ivirshup opened 1 year ago

ivirshup commented 1 year ago

Hi, I have a feature request. Could the sshfs.SSHFileSystem get an implementation for _cat_file?

I'm trying to use sshfs with zarr, but hit a NotImplementedError when I try to construct a group.

Roughly what I've run:

import sshfs, zarr

fs = sshfs.SSHFileSystem(host)
store = zarr.storage.FSStore("/path/to/data.zarr", fs=fs, mode="r")

g = zarr.open(store)
File /usr/local/Cellar/python@3.9/3.9.12/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/tasks.py:442, in wait_for(fut, timeout, loop)
    437     warnings.warn("The loop argument is deprecated since Python 3.8, "
    438                   "and scheduled for removal in Python 3.10.",
    439                   DeprecationWarning, stacklevel=2)
    441 if timeout is None:
--> 442     return await fut
    444 if timeout <= 0:
    445     fut = ensure_future(fut, loop=loop)

File /usr/local/lib/python3.9/site-packages/fsspec/asyn.py:395, in AsyncFileSystem._cat_file(self, path, start, end, **kwargs)
    394 async def _cat_file(self, path, start=None, end=None, **kwargs):
--> 395     raise NotImplementedError
Full Traceback ```pytb --------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) Cell In[69], line 1 ----> 1 g = zarr.open(store, mode="r") File /usr/local/lib/python3.9/site-packages/zarr/convenience.py:120, in open(store, mode, zarr_version, path, **kwargs) 118 return open_array(_store, mode=mode, **kwargs) 119 elif contains_group(_store, path): --> 120 return open_group(_store, mode=mode, **kwargs) 121 else: 122 raise PathNotFoundError(path) File /usr/local/lib/python3.9/site-packages/zarr/hierarchy.py:1465, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, meta_array) 1462 # determine read only status 1463 read_only = mode == 'r' -> 1465 return Group(store, read_only=read_only, cache_attrs=cache_attrs, 1466 synchronizer=synchronizer, path=path, chunk_store=chunk_store, 1467 zarr_version=zarr_version, meta_array=meta_array) File /usr/local/lib/python3.9/site-packages/zarr/hierarchy.py:164, in Group.__init__(self, store, path, read_only, chunk_store, cache_attrs, synchronizer, zarr_version, meta_array) 162 mkey = _prefix_to_group_key(self._store, self._key_prefix) 163 assert not mkey.endswith("root/.group") --> 164 meta_bytes = store[mkey] 165 except KeyError: 166 if self._version == 2: File /usr/local/lib/python3.9/site-packages/zarr/storage.py:1393, in FSStore.__getitem__(self, key) 1391 key = self._normalize_key(key) 1392 try: -> 1393 return self.map[key] 1394 except self.exceptions as e: 1395 raise KeyError(key) from e File /usr/local/lib/python3.9/site-packages/fsspec/mapping.py:143, in FSMap.__getitem__(self, key, default) 141 k = self._key_to_str(key) 142 try: --> 143 result = self.fs.cat(k) 144 except self.missing_exceptions: 145 if default is not None: File /usr/local/lib/python3.9/site-packages/fsspec/asyn.py:114, in sync_wrapper..wrapper(*args, **kwargs) 111 @functools.wraps(func) 112 def wrapper(*args, **kwargs): 113 self = obj or args[0] --> 114 return sync(self.loop, func, *args, **kwargs) File /usr/local/lib/python3.9/site-packages/fsspec/asyn.py:99, in sync(loop, func, timeout, *args, **kwargs) 97 raise FSTimeoutError from return_result 98 elif isinstance(return_result, BaseException): ---> 99 raise return_result 100 else: 101 return return_result File /usr/local/lib/python3.9/site-packages/fsspec/asyn.py:54, in _runner(event, coro, result, timeout) 52 coro = asyncio.wait_for(coro, timeout=timeout) 53 try: ---> 54 result[0] = await coro 55 except Exception as ex: 56 result[0] = ex File /usr/local/lib/python3.9/site-packages/fsspec/asyn.py:409, in AsyncFileSystem._cat(self, path, recursive, on_error, batch_size, **kwargs) 407 ex = next(filter(is_exception, out), False) 408 if ex: --> 409 raise ex 410 if ( 411 len(paths) > 1 412 or isinstance(path, list) 413 or paths[0] != self._strip_protocol(path) 414 ): 415 return { 416 k: v 417 for k, v in zip(paths, out) 418 if on_error != "omit" or not is_exception(v) 419 } File /usr/local/Cellar/python@3.9/3.9.12/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/tasks.py:442, in wait_for(fut, timeout, loop) 437 warnings.warn("The loop argument is deprecated since Python 3.8, " 438 "and scheduled for removal in Python 3.10.", 439 DeprecationWarning, stacklevel=2) 441 if timeout is None: --> 442 return await fut 444 if timeout <= 0: 445 fut = ensure_future(fut, loop=loop) File /usr/local/lib/python3.9/site-packages/fsspec/asyn.py:395, in AsyncFileSystem._cat_file(self, path, start, end, **kwargs) 394 async def _cat_file(self, path, start=None, end=None, **kwargs): --> 395 raise NotImplementedError NotImplementedError: ```