cggh / scikit-allel

A Python package for exploring and analysing genetic variation data
MIT License
287 stars 49 forks source link

Variant filtering and compress() on example from VariantChunkedTable documentation leads to error #353

Closed vsbuffalo closed 3 years ago

vsbuffalo commented 3 years ago

Hello!

I ran into an error in filtering a VariantChunkedTable (sort of following this tutorial) and in trying to create an MRE, have found the same error occurs when I filter the example VariantChunkedTable in the documentation. Here's an MRE:

import h5py
import allel as al

chrom = [b'chr1', b'chr1', b'chr2', b'chr2', b'chr3']
pos = [2, 7, 3, 9, 6]
dp = [35, 12, 78, 22, 99]
qd = [4.5, 6.7, 1.2, 4.4, 2.8]
ac = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]
with h5py.File('callset.h5', mode='w') as h5f:
    h5g = h5f.create_group('/3L/variants')
    h5g.create_dataset('CHROM', data=chrom, chunks=True)
    h5g.create_dataset('POS', data=pos, chunks=True)
    h5g.create_dataset('DP', data=dp, chunks=True)
    h5g.create_dataset('QD', data=qd, chunks=True)
    h5g.create_dataset('AC', data=ac, chunks=True)

callset = h5py.File('callset.h5', mode='r')
vt = al.VariantChunkedTable(callset['/3L/variants'], names=['CHROM', 'POS', 'AC', 'QD', 'DP'])
vs = vt.eval('DP > 15')[:]
v = vt.compress(vs, axis=0)

This leads to:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-54cda9cd9a1b> in <module>
      2 vt = al.VariantChunkedTable(callset['/3L/variants'], names=['CHROM', 'POS', 'AC', 'QD', 'DP'])
      3 vs = vt.eval('DP > 15')[:]
----> 4 v = vt.compress(vs, axis=0)

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/allel/chunked/core.py in compress(self, condition, axis, out, blen, storage, create, **kwargs)
    950     def compress(self, condition, axis=None, out=None, blen=None, storage=None, create='table',
    951                  **kwargs):
--> 952         out = compress_table(condition, self, axis=axis, out=out, blen=blen, storage=storage,
    953                              create=create, **kwargs)
    954         return type(self)(out)

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/allel/chunked/core.py in compress_table(condition, tbl, axis, out, blen, storage, create, **kwargs)
    344             res = [np.compress(bcond, c, axis=0) for c in bcolumns]
    345             if out is None:
--> 346                 out = getattr(storage, create)(res, names=names,
    347                                                expectedlen=nnz, **kwargs)
    348             else:

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/allel/chunked/storage_zarr.py in table(self, data, names, expectedlen, **kwargs)
     71             if chunks is None:
     72                 chunks = default_chunks(c, expectedlen)
---> 73             g.array(name=n, data=c, chunks=chunks)
     74 
     75         # create table

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/hierarchy.py in array(self, name, data, **kwargs)
    946         """Create an array. Keyword arguments as per
    947         :func:`zarr.creation.array`."""
--> 948         return self._write_op(self._array_nosync, name, data, **kwargs)
    949 
    950     def _array_nosync(self, name, data, **kwargs):

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/hierarchy.py in _write_op(self, f, *args, **kwargs)
    659 
    660         with lock:
--> 661             return f(*args, **kwargs)
    662 
    663     def create_group(self, name, overwrite=False):

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/hierarchy.py in _array_nosync(self, name, data, **kwargs)
    952         kwargs.setdefault('synchronizer', self._synchronizer)
    953         kwargs.setdefault('cache_attrs', self.attrs.cache)
--> 954         return array(data, store=self._store, path=path, chunk_store=self._chunk_store,
    955                      **kwargs)
    956 

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/creation.py in array(data, **kwargs)
    352 
    353     # instantiate array
--> 354     z = create(**kwargs)
    355 
    356     # fill with data

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/creation.py in create(shape, chunks, dtype, compressor, fill_value, order, store, synchronizer, overwrite, path, chunk_store, filters, cache_metadata, cache_attrs, read_only, object_codec, **kwargs)
    119 
    120     # initialize array metadata
--> 121     init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,
    122                fill_value=fill_value, order=order, overwrite=overwrite, path=path,
    123                chunk_store=chunk_store, filters=filters, object_codec=object_codec)

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/storage.py in init_array(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec)
    342     _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite)
    343 
--> 344     _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype,
    345                          compressor=compressor, fill_value=fill_value,
    346                          order=order, overwrite=overwrite, path=path,

~/miniconda3/envs/bioinfo/lib/python3.8/site-packages/zarr/storage.py in _init_array_metadata(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters, object_codec)
    412             if not filters:
    413                 # there are no filters so we can be sure there is no object codec
--> 414                 raise ValueError('missing object_codec for object array')
    415             else:
    416                 # one of the filters may be an object codec, issue a warning rather

ValueError: missing object_codec for object array

And my scikit-allel version is:

>>> al.__version__
'1.3.2'
alimanfoo commented 3 years ago

Thanks @vsbuffalo, I've merged a fix, available from pypi as v1.3.3.

vsbuffalo commented 3 years ago

Thanks so much @alimanfoo for the incredibly speedy bug fix!

alimanfoo commented 3 years ago

No problem :-)