Closed vsbuffalo closed 3 years ago
Hello!
I ran into an error in filtering a VariantChunkedTable (sort of following this tutorial) and in trying to create an MRE, have found the same error occurs when I filter the example VariantChunkedTable in the documentation. Here's an MRE:
VariantChunkedTable
import h5py import allel as al chrom = [b'chr1', b'chr1', b'chr2', b'chr2', b'chr3'] pos = [2, 7, 3, 9, 6] dp = [35, 12, 78, 22, 99] qd = [4.5, 6.7, 1.2, 4.4, 2.8] ac = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)] with h5py.File('callset.h5', mode='w') as h5f: h5g = h5f.create_group('/3L/variants') h5g.create_dataset('CHROM', data=chrom, chunks=True) h5g.create_dataset('POS', data=pos, chunks=True) h5g.create_dataset('DP', data=dp, chunks=True) h5g.create_dataset('QD', data=qd, chunks=True) h5g.create_dataset('AC', data=ac, chunks=True) callset = h5py.File('callset.h5', mode='r') vt = al.VariantChunkedTable(callset['/3L/variants'], names=['CHROM', 'POS', 'AC', 'QD', 'DP']) vs = vt.eval('DP > 15')[:] v = vt.compress(vs, axis=0)
This leads to:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-2-54cda9cd9a1b> in <module> 2 vt = al.VariantChunkedTable(callset['/3L/variants'], names=['CHROM', 'POS', 'AC', 'QD', 'DP']) 3 vs = vt.eval('DP > 15')[:] ----> 4 v = vt.compress(vs, axis=0) ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/allel/chunked/core.py in compress(self, condition, axis, out, blen, storage, create, **kwargs) 950 def compress(self, condition, axis=None, out=None, blen=None, storage=None, create='table', 951 **kwargs): --> 952 out = compress_table(condition, self, axis=axis, out=out, blen=blen, storage=storage, 953 create=create, **kwargs) 954 return type(self)(out) ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/allel/chunked/core.py in compress_table(condition, tbl, axis, out, blen, storage, create, **kwargs) 344 res = [np.compress(bcond, c, axis=0) for c in bcolumns] 345 if out is None: --> 346 out = getattr(storage, create)(res, names=names, 347 expectedlen=nnz, **kwargs) 348 else: ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/allel/chunked/storage_zarr.py in table(self, data, names, expectedlen, **kwargs) 71 if chunks is None: 72 chunks = default_chunks(c, expectedlen) ---> 73 g.array(name=n, data=c, chunks=chunks) 74 75 # create table ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/hierarchy.py in array(self, name, data, **kwargs) 946 """Create an array. Keyword arguments as per 947 :func:`zarr.creation.array`.""" --> 948 return self._write_op(self._array_nosync, name, data, **kwargs) 949 950 def _array_nosync(self, name, data, **kwargs): ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/hierarchy.py in _write_op(self, f, *args, **kwargs) 659 660 with lock: --> 661 return f(*args, **kwargs) 662 663 def create_group(self, name, overwrite=False): ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/hierarchy.py in _array_nosync(self, name, data, **kwargs) 952 kwargs.setdefault('synchronizer', self._synchronizer) 953 kwargs.setdefault('cache_attrs', self.attrs.cache) --> 954 return array(data, store=self._store, path=path, chunk_store=self._chunk_store, 955 **kwargs) 956 ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/creation.py in array(data, **kwargs) 352 353 # instantiate array --> 354 z = create(**kwargs) 355 356 # fill with data ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/creation.py in create(shape, chunks, dtype, compressor, fill_value, order, store, synchronizer, overwrite, path, chunk_store, filters, cache_metadata, cache_attrs, read_only, object_codec, **kwargs) 119 120 # initialize array metadata --> 121 init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, 122 fill_value=fill_value, order=order, overwrite=overwrite, path=path, 123 chunk_store=chunk_store, filters=filters, object_codec=object_codec) ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/storage.py in init_array(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec) 342 _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) 343 --> 344 _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, 345 compressor=compressor, fill_value=fill_value, 346 order=order, overwrite=overwrite, path=path, ~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/storage.py in _init_array_metadata(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec) 412 if not filters: 413 # there are no filters so we can be sure there is no object codec --> 414 raise ValueError('missing object_codec for object array') 415 else: 416 # one of the filters may be an object codec, issue a warning rather ValueError: missing object_codec for object array
And my scikit-allel version is:
>>> al.__version__ '1.3.2'
Thanks @vsbuffalo, I've merged a fix, available from pypi as v1.3.3.
Thanks so much @alimanfoo for the incredibly speedy bug fix!
No problem :-)
Hello!
I ran into an error in filtering a
VariantChunkedTable
(sort of following this tutorial) and in trying to create an MRE, have found the same error occurs when I filter the example VariantChunkedTable in the documentation. Here's an MRE:This leads to:
And my scikit-allel version is: