Closed mfbalin closed 2 months ago
Traceback (most recent call last):
File "/home/mfbalin/dgl-1/examples/sampling/graphbolt/lightning/../rgcn/hetero_rgcn.py", line 667, in <module>
main(args)
File "/home/mfbalin/dgl-1/examples/sampling/graphbolt/lightning/../rgcn/hetero_rgcn.py", line 624, in main
train(
File "/home/mfbalin/dgl-1/examples/sampling/graphbolt/lightning/../rgcn/hetero_rgcn.py", line 511, in train
for data in tqdm(data_loader, desc=f"Training~Epoch {epoch + 1:02d}"):
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/tqdm/std.py", line 1181, in __iter__
for obj in iterable:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 629, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 672, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 41, in fetch
data = next(self.dataset_iter)
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 150, in __next__
return self._get_next()
^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 138, in _get_next
result = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 222, in wrap_next
result = next_func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/datapipe.py", line 383, in __next__
return next(self._datapipe_iter)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/base.py", line 269, in __iter__
yield from self.datapipe
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/dataloader.py", line 68, in __iter__
yield from self.dataloader
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 629, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 672, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 41, in fetch
data = next(self.dataset_iter)
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 150, in __next__
return self._get_next()
^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 138, in _get_next
result = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 222, in wrap_next
result = next_func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/datapipe.py", line 383, in __next__
return next(self._datapipe_iter)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 124, in __iter__
for data in self.datapipe:
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/_hook_iterator.py", line 179, in wrap_generator
response = gen.send(None)
^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 125, in __iter__
yield self._apply_fn(data)
^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/.venvs/venv/lib/python3.12/site-packages/torch/utils/data/datapipes/iter/callable.py", line 90, in _apply_fn
return self.fn(data)
^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/minibatch_transformer.py", line 38, in _transformer
minibatch = self.transformer(minibatch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/impl/neighbor_sampler.py", line 172, in _sample_per_layer
subgraph = self.sampler(
^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/impl/fused_csc_sampling_graph.py", line 705, in sample_neighbors
return self._convert_to_sampled_subgraph(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/impl/fused_csc_sampling_graph.py", line 600, in _convert_to_sampled_subgraph
etype: CSCFormatBase(
^^^^^^^^^^^^^^
File "/home/mfbalin/dgl-1/python/dgl/graphbolt/base.py", line 352, in __init__
assert self.indptr[-1] == len(
^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: The last element of indptr should be the same as the length of indices.
This exception is thrown by __iter__ of SamplePerLayer(datapipe=MiniBatchTransformer, fanout=tensor([10, 10, 10, 10, 10, 10, 10]), prob_name=None, replace=False, sampler=<bound method FusedCSCSamplingGraph.sample_neighbors of FusedCSCSamplingGraph(csc_indptr=tensor([ 0, 5, 9, ..., 42221926, 42221939, 42222014],
dtype=torch.int32),
indices=tensor([1195459, 1223057, 1492639, ..., 1625065, 1630693, 1643218],
dtype=torch.int32),
total_num_nodes=1939743, num_edges={'author:affiliated_with:institution': 1043998, 'author:writes:paper': 7145660, 'field_of_study:rev_has_topic:paper': 7505078, 'institution:rev_affiliated_with:author': 1043998, 'paper:cites:paper': 10832542, 'paper:has_topic:field_of_study': 7505078, 'paper:rev_writes:author': 7145660},
node_type_offset=tensor([ 0, 1134649, 1194614, 1203354, 1939743], dtype=torch.int32),
type_per_edge=tensor([3, 6, 6, ..., 4, 4, 4], dtype=torch.uint8),
node_type_to_id={'author': 0, 'field_of_study': 1, 'institution': 2, 'paper': 3},
edge_type_to_id={'author:affiliated_with:institution': 0, 'author:writes:paper': 1, 'field_of_study:rev_has_topic:paper': 2, 'institution:rev_affiliated_with:author': 3, 'paper:cites:paper': 4, 'paper:has_topic:field_of_study': 5, 'paper:rev_writes:author': 6},)>)
printing indptr[-1] and len indices:
tensor(0, dtype=torch.int32) 0
tensor(4492, dtype=torch.int32) 4492
tensor(9900, dtype=torch.int32) 9876
I think it is a +1 -1 bug.
🔨Work Item
IMPORTANT:
Project tracker: https://github.com/orgs/dmlc/projects/2
Description
CPU hetero example issue in the regression test: