HDF5 Data loader - Githubissues

When working with HD5 files, one has to especially pay attention to closing the file after every step because othwerwise the iterators stalls.

The right way to do it is as follows:

import h5py
import torch
import torch.utils.data as data

class H52Dataset(data.Dataset):

    def __init__(self, file_path):
        super(H52Dataset, self).__init__()
        self.file_path = file_path
        self.keys = ["transfers", "c_transfers", "reconstructions", "c_reconstructions"]

    def __getitem__(self, index):       
        with h5py.File(self.file_path, 'r', swmr=True) as h5_file:
            dsets = {k : h5_file[k] for k in self.keys}        
            out = tuple([torch.from_numpy(dsets[k][index, :, : :]).float() / 127.5 - 1.0 for k in self.keys])
        return out

    def __len__(self):
        with h5py.File(self.file_path, 'r', swmr=True) as h5_file:   
            _len = h5_file['transfers'].shape[0]
        return _len

pesser / edflow

HDF5 Data loader #272