Describe the bug
Training fails in the link prediction with the following message:
`
link_prediction.train: Traceback (most recent call last):
File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1134, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/usr/lib/python3.9/multiprocessing/queues.py", line 122, in get
return _ForkingPickler.loads(res)
File "/usr/local/lib/python3.9/dist-packages/torch/multiprocessing/reductions.py", line 297, in rebuild_storage_fd
fd = df.detach()
File "/usr/lib/python3.9/multiprocessing/resource_sharer.py", line 57, in detach
with _resource_sharer.get_connection(self._id) as conn:
File "/usr/lib/python3.9/multiprocessing/resource_sharer.py", line 86, in get_connection
c = Client(address, authkey=process.current_process().authkey)
File "/usr/lib/python3.9/multiprocessing/connection.py", line 507, in Client
c = SocketClient(address)
File "/usr/lib/python3.9/multiprocessing/connection.py", line 635, in SocketClient
s.connect(address)
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/memgraph/query_modules/link_prediction.py", line 322, in train
training_results, validation_results = inner_train(
File "/usr/lib/memgraph/query_modules/mage/link_prediction/link_prediction_util.py", line 417, in innertrain
for , pos_graph, neg_graph, blocks in train_dataloader:
File "/usr/local/lib/python3.9/dist-packages/dgl-0.9.1.post1-py3.9-linux-x86_64.egg/dgl/dataloading/dataloader.py", line 512, in next
self._next_non_threaded() if not self.use_thread else self._next_threaded()
File "/usr/local/lib/python3.9/dist-packages/dgl-0.9.1.post1-py3.9-linux-x86_64.egg/dgl/dataloading/dataloader.py", line 492, in _next_non_threaded
batch = next(self.dataloader_it)
File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 652, in next
data = self._next_data()
File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1330, in _next_data
idx, data = self._get_data()
File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1296, in _get_data
success, data = self._try_get_data()
File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1147, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 373) exited unexpectedly
`
Describe the bug Training fails in the link prediction with the following message: ` link_prediction.train: Traceback (most recent call last): File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1134, in _try_get_data data = self._data_queue.get(timeout=timeout) File "/usr/lib/python3.9/multiprocessing/queues.py", line 122, in get return _ForkingPickler.loads(res) File "/usr/local/lib/python3.9/dist-packages/torch/multiprocessing/reductions.py", line 297, in rebuild_storage_fd fd = df.detach() File "/usr/lib/python3.9/multiprocessing/resource_sharer.py", line 57, in detach with _resource_sharer.get_connection(self._id) as conn: File "/usr/lib/python3.9/multiprocessing/resource_sharer.py", line 86, in get_connection c = Client(address, authkey=process.current_process().authkey) File "/usr/lib/python3.9/multiprocessing/connection.py", line 507, in Client c = SocketClient(address) File "/usr/lib/python3.9/multiprocessing/connection.py", line 635, in SocketClient s.connect(address) ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/usr/lib/memgraph/query_modules/link_prediction.py", line 322, in train training_results, validation_results = inner_train( File "/usr/lib/memgraph/query_modules/mage/link_prediction/link_prediction_util.py", line 417, in innertrain for , pos_graph, neg_graph, blocks in train_dataloader: File "/usr/local/lib/python3.9/dist-packages/dgl-0.9.1.post1-py3.9-linux-x86_64.egg/dgl/dataloading/dataloader.py", line 512, in next self._next_non_threaded() if not self.use_thread else self._next_threaded() File "/usr/local/lib/python3.9/dist-packages/dgl-0.9.1.post1-py3.9-linux-x86_64.egg/dgl/dataloading/dataloader.py", line 492, in _next_non_threaded batch = next(self.dataloader_it) File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 652, in next data = self._next_data() File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1330, in _next_data idx, data = self._get_data() File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1296, in _get_data success, data = self._try_get_data() File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/dataloader.py", line 1147, in _try_get_data raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e RuntimeError: DataLoader worker (pid(s) 373) exited unexpectedly `
To Reproduce Reproduced error happens when trying to load the example dataset with queries at the blogpost https://memgraph.com/blog/building-a-recommendation-system-for-telecommunication-packages-using-graph-neural-networks
Expected behavior Either a concise error message with the steps on how to enable it or passing train method.