Hello, may I ask if I have npy data with dimension (n.128.128), and now I need to screen rainfall events according to the sliding window method in your paper, and meet the loading requirements of h5 files? May I ask if the dimension of h5 I saved at the end (n1,25,128,128) is correct?
import h5py
import torch
from torch.utils.data import Dataset
from torchvision import transforms
# Load the radar sequence data
radar_data = np.load('./merged_data_train_radar_256_0-1.npy')
# Define the sliding window function
def select_precipitation_events(radar_data, pixel_threshold):
Lin = 5
Lout = 20
i = 10
event_set = []
while i + Lout < len(radar_data):
if np.mean(radar_data[i]) > pixel_threshold:
event = radar_data[i - Lin:i + Lout]
event_pixel = np.mean(event)
if event_pixel >= ((Lin + Lout) * pixel_threshold / 2):
event_set.extend(event)
i += Lout
continue
i += 1
return np.array(event_set)
# Set your pixel threshold
pixel_threshold = ... # replace with your pixel threshold
# Select precipitation events
precipitation_events = select_precipitation_events(radar_data, pixel_threshold)
# Split the data
train_end = int(0.8 * len(precipitation_events))
val_end = int(0.9 * len(precipitation_events))
train_data = precipitation_events[:train_end]
val_data = precipitation_events[train_end:val_end]
test_data = precipitation_events[val_end:]
# Save to H5 file
with h5py.File('shanghai_radar.h5', 'w') as f:
# Use 'astype' to ensure compatible data type with the dataset class
f.create_dataset('train/data', data=train_data.astype(np.uint8))
f.create_dataset('train/all_len', data=len(train_data))
f.create_dataset('val/data', data=val_data.astype(np.uint8))
f.create_dataset('val/all_len', data=len(val_data))
f.create_dataset('test/data', data=test_data.astype(np.uint8))
f.create_dataset('test/all_len', data=len(test_data))
# Verify loading with the 'Shanghai' dataset class
class Shanghai(Dataset):
# ... the class as you've provided ...
# Assuming the transformations are correctly set for the data
dataset = Shanghai('shanghai_radar.h5', 128, type='train')
sample = dataset.sample()
Hi, the .h5 filetype is not necessary.
You can just keep the every radar sequence as [ num_frame, num_channel, height, weight], where the num_channel of radar data is 1, typically.
Hello, may I ask if I have npy data with dimension (n.128.128), and now I need to screen rainfall events according to the sliding window method in your paper, and meet the loading requirements of h5 files? May I ask if the dimension of h5 I saved at the end (n1,25,128,128) is correct?