Closed lucasfbn closed 2 years ago
import numpy as np import h5py import os from tqdm import tqdm import pandas as pd def save_dict_to_hdf5(dic, filename): """ .... """ with pd.HDFStore(filename, 'w') as h5file: # with h5py.File(filename, 'w') as h5file: recursively_save_dict_contents_to_group(h5file, '/', dic) def recursively_save_dict_contents_to_group(h5file, path, dic): """ .... """ for key, item in dic.items(): # if isinstance(item, (np.ndarray, np.int64, np.float64, str, bytes, int, float)): # h5file[path + key] = item # elif isinstance(item, pd.DataFrame): # item.to_hdf(h5file, key=path + key) if isinstance(item, dict): recursively_save_dict_contents_to_group(h5file, path + key + '/', item) else: h5file[path + key] = item def load_dict_from_hdf5(filename): """ .... """ with pd.HDFStore(filename, 'r+') as h5file: return recursively_load_dict_contents_from_group(h5file, '/') def recursively_load_dict_contents_from_group(h5file, path): """ .... """ keys = h5file.keys() keys_split = [k.split("/")[1:] for k in keys] def make_path(d: dict, paths: list) -> None: for key in paths: d = d.setdefault(key, {}) d = {} for a in keys_split: make_path(d, a) for key, key_s in zip(keys, keys_split): item = h5file.get(key) last = d for i, ks in enumerate(key_s): if i == len(key_s) - 1: last[ks] = item else: last = d[ks] return d def dummy_dict(): temp = {} for _ in range(1000): temp1 = {} for i in range(25): temp1[str(i)] = pd.Series(i) temp2 = {} for j in range(200): temp2[str(j)] = pd.DataFrame(np.random.randn(14, 20)) temp[str(_)] = {"1": temp1, "2": temp2} return temp if __name__ == '__main__': data = dummy_dict() print(len(data.keys())) # data = {'x': 10, # 'y': np.arange(10), # 'd': {'x': np.ones((2, 3)), # 'b': 10.5}} # # data = {"x": pd.DataFrame({"hallo": [10, 12, 14], "jo": [423, 5436, 2]}), # "y": {"a": pd.Series(), "b": pd.Series()}} # print("writing") filename = 'test.h5' # save_dict_to_hdf5(data, filename) dd = load_dict_from_hdf5(filename) # print(dd)
H5 attempt: