The ExperimentDataPipe appears to be reliant on private API in somacore, which changes in the upcoming tiledbsoma 1.7 release. This will need to be addressed before our package is updated.
E AttributeError: module 'somacore.query._fast_csr' has no attribute 'read_scipy_csr'
Details:
________________ test_non_batched[6-3-pytorch_x_value_gen-True] ________________
soma_experiment = <Experiment '/tmp/pytest-of-runner/pytest-0/test_non_batched_6_3_pytorch_x0/exp' (open for 'r') (2 items)
'obs': '...xp/obs' (unopened)
'ms': 'file:///tmp/pytest-of-runner/pytest-0/test_non_batched_6_3_pytorch_x0/exp/ms' (unopened)>
use_eager_fetch = True
@pytest.mark.experimental
# noinspection PyTestParametrized
@pytest.mark.parametrize(
"obs_range,var_range,X_value_gen,use_eager_fetch",
[(6, 3, pytorch_x_value_gen, use_eager_fetch) for use_eager_fetch in (True, False)],
)
def test_non_batched(soma_experiment: Experiment, use_eager_fetch: bool) -> None:
exp_data_pipe = ExperimentDataPipe(
soma_experiment,
measurement_name="RNA",
X_name="raw",
obs_column_names=["label"],
use_eager_fetch=use_eager_fetch,
)
row_iter = iter(exp_data_pipe)
> row = next(row_iter)
api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py:145:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../_tool/Python/3.11.7/x64/lib/python3.11/site-packages/torch/utils/data/datapipes/_hook_iterator.py:173: in wrap_generator
response = gen.send(None)
../../_tool/Python/3.11.7/x64/lib/python3.11/site-packages/cellxgene_census/experimental/ml/pytorch.py:584: in __iter__
for datum_ in obs_and_x_iter:
../../_tool/Python/3.11.7/x64/lib/python3.11/site-packages/cellxgene_census/experimental/ml/pytorch.py:252: in __next__
obs_partial, X_partial = self._read_partial_torch_batch(self.batch_size - len(obs))
../../_tool/Python/3.11.7/x64/lib/python3.11/site-packages/cellxgene_census/experimental/ml/pytorch.py:300: in _read_partial_torch_batch
self.soma_chunk: _SOMAChunk = next(self.soma_chunk_iter)
../../_tool/Python/3.11.7/x64/lib/python3.11/site-packages/cellxgene_census/experimental/util/_eager_iter.py:33: in __next__
res = self._future.result()
../../_tool/Python/3.11.7/x64/lib/python3.11/concurrent/futures/_base.py:449: in result
return self.__get_result()
../../_tool/Python/3.11.7/x64/lib/python3.11/concurrent/futures/_base.py:401: in __get_result
raise self._exception
../../_tool/Python/3.11.7/x64/lib/python3.11/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <cellxgene_census.experimental.ml.pytorch._ObsAndXSOMAIterator object at 0x7f6be7a99850>
def __next__(self) -> _SOMAChunk:
pytorch_logger.debug("Retrieving next SOMA chunk...")
start_time = time()
# If no more batches to iterate through, raise StopIteration, as all iterators do when at end
obs_joinids_chunk = next(self.obs_joinids_chunks_iter)
obs_batch = (
self.obs.read(
coords=(obs_joinids_chunk,),
column_names=self.obs_column_names,
)
.concat()
.to_pandas()
.set_index("soma_joinid")
)
assert obs_batch.shape[0] == obs_joinids_chunk.shape[0]
# handle case of empty result (first batch has 0 rows)
if len(obs_batch) == 0:
raise StopIteration
# reorder obs rows to match obs_joinids_chunk ordering, which may be shuffled
obs_batch = obs_batch.reindex(obs_joinids_chunk, copy=False)
# note: order of rows in returned CSR matches the order of the requested obs_joinids, so no need to reindex
> X_batch = _fast_csr.read_scipy_csr(self.X, pa.array(obs_joinids_chunk), pa.array(self.var_joinids))
E AttributeError: module 'somacore.query._fast_csr' has no attribute 'read_scipy_csr'
E This exception is thrown by __iter__ of ExperimentDataPipe(batch_size=1, measurement_name='RNA', obs_column_names=['soma_joinid', 'label'], obs_query=None, return_sparse_X=False, shuffle=functools.partial(<function IterDataPipe.register_datapipe_as_function.<locals>.class_function at 0x7f6ca37fff60>, <class 'torch.utils.data.datapipes.iter.combinatorics.ShufflerIterDataPipe'>, False, ExperimentDataPipe), soma_chunk_size=149130808, use_eager_fetch=True, var_query=None)
../../_tool/Python/3.11.7/x64/lib/python3.11/site-packages/cellxgene_census/experimental/ml/pytorch.py:170: AttributeError
The ExperimentDataPipe appears to be reliant on private API in somacore, which changes in the upcoming tiledbsoma 1.7 release. This will need to be addressed before our package is updated.
Details: