snap-stanford / ogb

Benchmark datasets, data loaders, and evaluators for graph machine learning
https://ogb.stanford.edu
MIT License
1.89k stars 397 forks source link

RuntimeError: PytorchStreamReader failed locating file data.pkl: file not found #320

Closed pending1face closed 2 years ago

pending1face commented 2 years ago

I am getting a runtime error when I try to download the dataset.

import torch
import numpy as np
import torch_geometric.datasets
from ogb.lsc import PCQM4Mv2Dataset
from ogb.utils import smiles2graph
import pyximport

dataset = PCQM4Mv2Dataset(root = 'ogblsc/', smiles2graph = smiles2graph)

And the error is:

RuntimeError                              Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_3776\3893196584.py in <module>
----> 1 dataset = PCQM4Mv2Dataset(root = 'ogblsc/', smiles2graph = smiles2graph)

C:\ProgramData\Anaconda3\envs\graphormer\lib\site-packages\ogb\lsc\pcqm4mv2.py in __init__(self, root, smiles2graph, only_smiles)
     50             self.prepare_smiles()
     51         else:
---> 52             self.prepare_graph()
     53 
     54     def download(self):

C:\ProgramData\Anaconda3\envs\graphormer\lib\site-packages\ogb\lsc\pcqm4mv2.py in prepare_graph(self)
     80         if osp.exists(pre_processed_file_path):
     81             # if pre-processed file already exists
---> 82             loaded_dict = torch.load(pre_processed_file_path, 'rb')
     83             self.graphs, self.labels = loaded_dict['graphs'], loaded_dict['labels']
     84 

C:\ProgramData\Anaconda3\envs\graphormer\lib\site-packages\torch\serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
    605                     opened_file.seek(orig_position)
    606                     return torch.jit.load(opened_file)
--> 607                 return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
    608         return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
    609 

C:\ProgramData\Anaconda3\envs\graphormer\lib\site-packages\torch\serialization.py in _load(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)
    876 
    877     # Load the data (which may in turn use `persistent_load` to load tensors)
--> 878     data_file = io.BytesIO(zip_file.get_record(pickle_file))
    879 
    880     unpickler = UnpicklerWrapper(data_file, **pickle_load_args)

RuntimeError: PytorchStreamReader failed locating file data.pkl: file not found
weihua916 commented 2 years ago

Hi! I suspect your .pt file is corrupted (see here). Can you delete the dataset folder and try everything again?