Open mkitti opened 3 months ago
v2.18.2
v0.12.1
3.12.4
Linux
using conda
I get the following error when trying to open a dataset compressed with tensorstore using the zstd compressor.
RuntimeError: Zstd decompression error: invalid input data
In [8]: ds = ts.open({ ...: 'driver': 'zarr', ...: 'kvstore': { ...: 'driver': 'file', ...: 'path': 'tmp/zarr_zstd_dataset', ...: }, ...: 'metadata': { ...: 'compressor': { ...: 'id': 'zstd', ...: 'level': 3, ...: }, ...: 'shape': [1024, 1024], ...: 'chunks': [64, 64], ...: 'dtype': '|u1', ...: 'dimension_separator': '/', ...: }, ...: 'create': True, ...: 'delete_existing': True, ...: }).result() In [9]: ds[:,:] = 5 In [10]: import zarr In [11]: arr = zarr.open_array("tmp/zarr_zstd_dataset") In [12]: arr[:,:] --------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[12], line 1 ----> 1 arr[:,:] File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:798, in Array.__getitem__(self, selection) 796 result = self.vindex[selection] 797 elif is_pure_orthogonal_indexing(pure_selection, self.ndim): --> 798 result = self.get_orthogonal_selection(pure_selection, fields=fields) 799 else: 800 result = self.get_basic_selection(pure_selection, fields=fields) File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:1080, in Array.get_orthogonal_selection(self, selection, out, fields) 1077 # setup indexer 1078 indexer = OrthogonalIndexer(selection, self) -> 1080 return self._get_selection(indexer=indexer, out=out, fields=fields) File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:1343, in Array._get_selection(self, indexer, out, fields) 1340 if math.prod(out_shape) > 0: 1341 # allow storage to get multiple items at once 1342 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) -> 1343 self._chunk_getitems( 1344 lchunk_coords, 1345 lchunk_selection, 1346 out, 1347 lout_selection, 1348 drop_axes=indexer.drop_axes, 1349 fields=fields, 1350 ) 1351 if out.shape: 1352 return out File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:2183, in Array._chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes, fields) 2181 for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection): 2182 if ckey in cdatas: -> 2183 self._process_chunk( 2184 out, 2185 cdatas[ckey], 2186 chunk_select, 2187 drop_axes, 2188 out_is_ndarray, 2189 fields, 2190 out_select, 2191 partial_read_decode=partial_read_decode, 2192 ) 2193 else: 2194 # check exception type 2195 if self._fill_value is not None: File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:2096, in Array._process_chunk(self, out, cdata, chunk_selection, drop_axes, out_is_ndarray, fields, out_selection, partial_read_decode) 2094 except ArrayIndexError: 2095 cdata = cdata.read_full() -> 2096 chunk = self._decode_chunk(cdata) 2098 # select data from chunk 2099 if fields: File ~/review_temp/conda/3/x86_64/envs/zarr_python/lib/python3.12/site-packages/zarr/core.py:2352, in Array._decode_chunk(self, cdata, start, nitems, expected_shape) 2350 chunk = self._compressor.decode_partial(cdata, start, nitems) 2351 else: -> 2352 chunk = self._compressor.decode(cdata) 2353 else: 2354 chunk = cdata File numcodecs/zstd.pyx:219, in numcodecs.zstd.Zstd.decode() File numcodecs/zstd.pyx:153, in numcodecs.zstd.decompress() RuntimeError: Zstd decompression error: invalid input data
$ conda env export name: zarr_python channels: - conda-forge - defaults dependencies: - _libgcc_mutex=0.1=conda_forge - _openmp_mutex=4.5=2_gnu - aiohttp=3.9.5=py312h98912ed_0 - aiosignal=1.3.1=pyhd8ed1ab_0 - aom=3.9.1=hac33072_0 - asciitree=0.3.3=py_2 - asttokens=2.4.1=pyhd8ed1ab_0 - attrs=23.2.0=pyh71513ae_0 - blosc=1.21.6=hef167b5_0 - brotli-python=1.1.0=py312h30efb56_1 - bzip2=1.0.8=h4bc722e_7 - c-ares=1.32.3=h4bc722e_0 - ca-certificates=2024.7.4=hbcca054_0 - certifi=2024.7.4=pyhd8ed1ab_0 - cffi=1.16.0=py312hf06ca03_0 - charset-normalizer=3.3.2=pyhd8ed1ab_0 - dav1d=1.2.1=hd590300_0 - decorator=5.1.1=pyhd8ed1ab_0 - exceptiongroup=1.2.2=pyhd8ed1ab_0 - executing=2.0.1=pyhd8ed1ab_0 - fasteners=0.17.3=pyhd8ed1ab_0 - frozenlist=1.4.1=py312h98912ed_0 - fsspec=2024.6.1=pyhff2d567_0 - h2=4.1.0=pyhd8ed1ab_0 - hpack=4.0.0=pyh9f0ad1d_0 - hyperframe=6.0.1=pyhd8ed1ab_0 - idna=3.7=pyhd8ed1ab_0 - ipython=8.26.0=pyh707e725_0 - jedi=0.19.1=pyhd8ed1ab_0 - keyutils=1.6.1=h166bdaf_0 - krb5=1.21.3=h659f571_0 - ld_impl_linux-64=2.40=hf3520f5_7 - libabseil=20240116.2=cxx17_he02047a_1 - libavif16=1.1.0=h9b56c87_0 - libblas=3.9.0=23_linux64_openblas - libcblas=3.9.0=23_linux64_openblas - libcurl=8.9.0=hdb1bdb2_0 - libedit=3.1.20191231=he28a2e2_2 - libev=4.33=hd590300_2 - libexpat=2.6.2=h59595ed_0 - libffi=3.4.2=h7f98852_5 - libgcc-ng=14.1.0=h77fa898_0 - libgfortran-ng=14.1.0=h69a702a_0 - libgfortran5=14.1.0=hc5f4f2c_0 - libgomp=14.1.0=h77fa898_0 - libjpeg-turbo=3.0.0=hd590300_1 - liblapack=3.9.0=23_linux64_openblas - libnghttp2=1.58.0=h47da74e_1 - libnsl=2.0.1=hd590300_0 - libopenblas=0.3.27=pthreads_hac2b453_1 - libpng=1.6.43=h2797004_0 - libprotobuf=4.25.3=h08a7969_0 - libsqlite=3.46.0=hde9e2c9_0 - libssh2=1.11.0=h0841786_0 - libstdcxx-ng=14.1.0=hc0a3c3a_0 - libuuid=2.38.1=h0b41bf4_0 - libwebp-base=1.4.0=hd590300_0 - libxcrypt=4.4.36=hd590300_1 - libzlib=1.3.1=h4ab18f5_1 - lz4-c=1.9.4=hcb278e6_0 - matplotlib-inline=0.1.7=pyhd8ed1ab_0 - ml_dtypes=0.4.0=py312h1d6d2e6_1 - msgpack-python=1.0.8=py312h2492b07_0 - multidict=6.0.5=py312h98912ed_0 - ncurses=6.5=h59595ed_0 - numcodecs=0.12.1=py312h7070661_1 - numpy=1.26.4=py312heda63a1_0 - openssl=3.3.1=h4bc722e_2 - parso=0.8.4=pyhd8ed1ab_0 - pexpect=4.9.0=pyhd8ed1ab_0 - pickleshare=0.7.5=py_1003 - pip=24.0=pyhd8ed1ab_0 - prompt-toolkit=3.0.47=pyha770c72_0 - ptyprocess=0.7.0=pyhd3deb0d_0 - pure_eval=0.2.3=pyhd8ed1ab_0 - pybind11-abi=4=hd8ed1ab_3 - pycparser=2.22=pyhd8ed1ab_0 - pygments=2.18.0=pyhd8ed1ab_0 - pysocks=1.7.1=pyha2e5f31_6 - python=3.12.4=h194c7f8_0_cpython - python_abi=3.12=4_cp312 - rav1e=0.6.6=he8a937b_2 - readline=8.2=h8228510_1 - requests=2.32.3=pyhd8ed1ab_0 - setuptools=71.0.4=pyhd8ed1ab_0 - six=1.16.0=pyh6c4a22f_0 - snappy=1.2.1=ha2e4443_0 - stack_data=0.6.2=pyhd8ed1ab_0 - svt-av1=2.1.2=hac33072_0 - tensorstore=0.1.62=py312h7e2185d_0 - tk=8.6.13=noxft_h4845f30_101 - traitlets=5.14.3=pyhd8ed1ab_0 - typing_extensions=4.12.2=pyha770c72_0 - tzdata=2024a=h0c530f3_0 - urllib3=2.2.2=pyhd8ed1ab_1 - wcwidth=0.2.13=pyhd8ed1ab_0 - wheel=0.43.0=pyhd8ed1ab_1 - xz=5.2.6=h166bdaf_0 - yarl=1.9.4=py312h98912ed_0 - zarr=2.18.2=pyhd8ed1ab_0 - zstandard=0.23.0=py312h3483029_0 - zstd=1.5.6=ha6fb4c9_0 prefix: /home/mkitti/review_temp/conda/3/x86_64/envs/zarr_python
xref: https://github.com/google/tensorstore/issues/182
I previously discussed the root cause of this here: https://github.com/zarr-developers/numcodecs/pull/519#issuecomment-2186666974
Zarr version
v2.18.2
Numcodecs version
v0.12.1
Python Version
3.12.4
Operating System
Linux
Installation
using conda
Description
I get the following error when trying to open a dataset compressed with tensorstore using the zstd compressor.
Steps to reproduce
Additional output
xref: https://github.com/google/tensorstore/issues/182