theislab / cellrank

CellRank: dynamics from multi-view single-cell data
https://cellrank.org
BSD 3-Clause "New" or "Revised" License
347 stars 46 forks source link

OSError when downloading example data: Unable to synchronously open file #1188

Closed LiuCanidk closed 4 months ago

LiuCanidk commented 7 months ago

description of the bug

I encountered the bug when I try to download the example data of bone_marrow following the tutorial section Get Started with CellRank, using Jupyter notebook. And the error occured when finished ~20-30% downloading task, seemingly random. I'm not sure whether it is due to the network instability.

reproducible example

import cellrank as cr
######################download the data
adata = cr.datasets.bone_marrow()
#take time: 21:11---
#often error, seems like the problem of network

error output

 35%|███████████████████████████▌                                                   | 129M/370M [23:55<44:41, 94.3kB/s]
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[27], line 2
      1 ######################load the data
----> 2 adata = cr.datasets.bone_marrow()
      3 #take time: 21:11---

File E:\python\Lib\site-packages\cellrank\datasets.py:292, in bone_marrow(path, **kwargs)
    271 @d.dedent
    272 def bone_marrow(
    273     path: Union[str, pathlib.Path] = "datasets/bone_marrow.h5ad",
    274     **kwargs: Any,
    275 ) -> AnnData:  # pragma: no cover
    276     """sc-RNA-seq dataset early human hematopoiesis (CD34+ bone marrow cells) assayed using 10X Chromium.
    277 
    278     This dataset contains raw spliced and unspliced counts estimated using *velocyto* :cite:`manno:18`.
   (...)
    290     Annotated data object.
    291     """
--> 292     return _load_dataset_from_url(path, *_datasets["bone_marrow"], **kwargs)

File E:\python\Lib\site-packages\cellrank\datasets.py:67, in _load_dataset_from_url(fpath, url, expected_shape, **kwargs)
     64 kwargs.setdefault("sparse", True)
     65 kwargs.setdefault("cache", True)
---> 67 adata = read(fpath, backup_url=url, **kwargs)
     69 if adata.shape != expected_shape:
     70     raise ValueError(f"Expected `anndata.AnnData` object to have shape `{expected_shape}`, found `{adata.shape}`.")

File E:\python\Lib\site-packages\legacy_api_wrap\__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File E:\python\Lib\site-packages\scanpy\readwrite.py:124, in read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, cache_compression, **kwargs)
    122 filename = Path(filename)  # allow passing strings
    123 if is_valid_filename(filename):
--> 124     return _read(
    125         filename,
    126         backed=backed,
    127         sheet=sheet,
    128         ext=ext,
    129         delimiter=delimiter,
    130         first_column_names=first_column_names,
    131         backup_url=backup_url,
    132         cache=cache,
    133         cache_compression=cache_compression,
    134         **kwargs,
    135     )
    136 # generate filename and read to dict
    137 filekey = str(filename)

File E:\python\Lib\site-packages\scanpy\readwrite.py:759, in _read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, cache_compression, suppress_cache_warning, **kwargs)
    757 if ext in {"h5", "h5ad"}:
    758     if sheet is None:
--> 759         return read_h5ad(filename, backed=backed)
    760     else:
    761         logg.debug(f"reading sheet {sheet} from file {filename}")

File E:\python\Lib\site-packages\anndata\_io\h5ad.py:237, in read_h5ad(filename, backed, as_sparse, as_sparse_fmt, chunk_size)
    229         raise NotImplementedError(
    230             "Currently only `X` and `raw/X` can be read as sparse."
    231         )
    233 rdasp = partial(
    234     read_dense_as_sparse, sparse_format=as_sparse_fmt, axis_chunk=chunk_size
    235 )
--> 237 with h5py.File(filename, "r") as f:
    239     def callback(func, elem_name: str, elem, iospec):
    240         if iospec.encoding_type == "anndata" or elem_name.endswith("/"):

File E:\python\Lib\site-packages\h5py\_hl\files.py:562, in File.__init__(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, fs_strategy, fs_persist, fs_threshold, fs_page_size, page_buf_size, min_meta_keep, min_raw_keep, locking, alignment_threshold, alignment_interval, meta_block_size, **kwds)
    553     fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
    554                      locking, page_buf_size, min_meta_keep, min_raw_keep,
    555                      alignment_threshold=alignment_threshold,
    556                      alignment_interval=alignment_interval,
    557                      meta_block_size=meta_block_size,
    558                      **kwds)
    559     fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy,
    560                      fs_persist=fs_persist, fs_threshold=fs_threshold,
    561                      fs_page_size=fs_page_size)
--> 562     fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
    564 if isinstance(libver, tuple):
    565     self._libver = libver

File E:\python\Lib\site-packages\h5py\_hl\files.py:235, in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
    233     if swmr and swmr_support:
    234         flags |= h5f.ACC_SWMR_READ
--> 235     fid = h5f.open(name, flags, fapl=fapl)
    236 elif mode == 'r+':
    237     fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)

File h5py\_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py\_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File h5py\h5f.pyx:102, in h5py.h5f.open()

OSError: Unable to synchronously open file (truncated file: eof = 135389460, sblock->base_addr = 0, stored_eof = 388391976)

Versions:

cellrank==2.0.4 scanpy==1.10.1 anndata==0.10.6 numpy==1.26.4 numba==0.59.1 scipy==1.11.4 pandas==2.2.1 pygpcca==1.0.4 scikit-learn==1.4.1.post1 statsmodels==0.14.1 scvelo==0.3.2 pygam==0.9.1 matplotlib==3.8.4 seaborn==0.13.2

Marius1311 commented 6 months ago

Hi @LiuCanidk, did you figure out whether that's just a connection issue, or something on our side?

WeilerP commented 4 months ago

I cannot reproduce the issue and there hasn't been any response in a while, so I'm closing the issue. Feel free to reach out if you have any follow-up questions, @LiuCanidk.