Closed mortonjt closed 5 years ago
@mortonjt, could you provide an example of the problem please? It seems to me that the import of floating point data is handled correctly:
In [24]: import biom
In [25]: from qiime2 import Artifact
In [26]: import qiime2
In [27]: qiime2.__version__
Out[27]: '2018.8.0'
In [28]: test_1 = Artifact.import_data('FeatureTable[Frequency]', biom.Table(np.array([[0, 1], [2, 3]]), ['a', 'b'], ['x', 'y']))
In [29]: test_2 = Artifact.import_data('FeatureTable[Frequency]', biom.Table(np.array([[0, 1.23], [2, 3.45]]), ['a', 'b'], ['x', 'y']))
In [30]: print(test_1.view(biom.Table))
# Constructed from biom file
#OTU ID x y
a 0.0 1.0
b 2.0 3.0
In [31]: print(test_2.view(biom.Table))
# Constructed from biom file
#OTU ID x y
a 0.0 1.23
b 2.0 3.45
@wasade thanks for the follow up. Should have provided more context.
The problem occurs when the column / row names are floating point
In [6]: test_1 = Artifact.import_data('FeatureTable[Frequency]', biom.Table(np.array([[0, 1], [2, 3]]), ['a', 'b'], [10.1, 10.2]))
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-6-f7256ce6b056> in <module>()
----> 1 test_1 = Artifact.import_data('FeatureTable[Frequency]', biom.Table(np.array([[0, 1], [2, 3]]), ['a', 'b'], [10.1, 10.2]))
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/qiime2/sdk/result.py in import_data(cls, type, view, view_type)
217
218 provenance_capture = archive.ImportProvenanceCapture(format_, md5sums)
--> 219 return cls._from_view(type_, view, view_type, provenance_capture)
220
221 @classmethod
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/qiime2/sdk/result.py in _from_view(cls, type, view, view_type, provenance_capture)
242 transformation = from_type.make_transformation(to_type,
243 recorder=recorder)
--> 244 result = transformation(view)
245
246 artifact = cls.__new__(cls)
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/qiime2/core/transform.py in transformation(view)
68 self.validate(view)
69
---> 70 new_view = transformer(view)
71
72 new_view = other.coerce_view(new_view)
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/qiime2/core/transform.py in wrapped(view)
218 def wrapped(view):
219 new_view = self._view_type()
--> 220 file_view = transformer(view)
221 if transformer is not identity_transformer:
222 self.set_user_owned(file_view, False)
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/q2_types/feature_table/_transformer.py in _6(data)
123 def _6(data: biom.Table) -> BIOMV210Format:
124 data = _drop_axis_metadata(data)
--> 125 return _table_to_v210(data)
126
127
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/q2_types/feature_table/_transformer.py in _table_to_v210(data)
64 ff = BIOMV210Format()
65 with ff.open() as fh:
---> 66 data.to_hdf5(fh, generated_by=_get_generated_by())
67 return ff
68
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/biom_format-2.1.6-py3.5-macosx-10.7-x86_64.egg/biom/table.py in to_hdf5(self, h5grp, generated_by, compress, format_fs)
4123 self.group_metadata(axis='observation'), 'csr', compression)
4124 axis_dump(h5grp.create_group('sample'), self.ids(),
-> 4125 self.metadata(), self.group_metadata(), 'csc', compression)
4126
4127 @classmethod
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/biom_format-2.1.6-py3.5-macosx-10.7-x86_64.egg/biom/table.py in axis_dump(grp, ids, md, group_md, order, compression)
4073 grp.create_dataset('ids', shape=(len_ids,),
4074 dtype=H5PY_VLEN_STR,
-> 4075 data=[i.encode('utf8') for i in ids],
4076 compression=compression)
4077 else:
~/miniconda3/envs/qiime2-2018.8/lib/python3.5/site-packages/biom_format-2.1.6-py3.5-macosx-10.7-x86_64.egg/biom/table.py in <listcomp>(.0)
4073 grp.create_dataset('ids', shape=(len_ids,),
4074 dtype=H5PY_VLEN_STR,
-> 4075 data=[i.encode('utf8') for i in ids],
4076 compression=compression)
4077 else:
AttributeError: 'float' object has no attribute 'encode'
Not sure if this is a problem that biom should explicitly handle - but this is definitely a problem that continues to reappear in the pipeline due to floating point conversion of barcode labels
According to the BIOM Format 2.1 specification, identifiers are strings.
👍 closing
From what I'm aware, the default biom table functionality exposed in qiime2 only handles count values.
As qiime2 expands to handling other multi-omics technologies (i.e. mass spec), it would be extremely advantageous to explicitly allow for floating point values (without rounding to integers). There are already some users attempting to run qiime2 on LC-MS data and are currently running into these issues.
CC @rsilvabioinfo