Open hhhhhumengshun opened 2 years ago
For 8x frame interpolation, we follow the FLAVR project. It seems that FLAVR does not provide the Adobe240 evaluation dataset, so we select a subset of all Adobe240 video sequences and perform center crop with 512x512. Below is our Adobe240 dataset python script:
import os
import numpy as np
import torch
from import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import random
import glob
from utils import read
def random_crop(img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8, crop_size):
h, w = crop_size[0], crop_size[1]
ih, iw, _ = img_0.shape
x = np.random.randint(0, ih-h+1)
y = np.random.randint(0, iw-w+1)
img_0 = img_0[x:x+h, y:y+w, :]
img_1 = img_1[x:x+h, y:y+w, :]
img_2 = img_2[x:x+h, y:y+w, :]
img_3 = img_3[x:x+h, y:y+w, :]
img_4 = img_4[x:x+h, y:y+w, :]
img_5 = img_5[x:x+h, y:y+w, :]
img_6 = img_6[x:x+h, y:y+w, :]
img_7 = img_7[x:x+h, y:y+w, :]
img_8 = img_8[x:x+h, y:y+w, :]
return img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8
def center_crop(img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8, crop_size):
h, w = crop_size[0], crop_size[1]
ih, iw, _ = img_0.shape
img_0 = img_0[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_1 = img_1[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_2 = img_2[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_3 = img_3[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_4 = img_4[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_5 = img_5[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_6 = img_6[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_7 = img_7[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
img_8 = img_8[(ih//2-h//2):(ih//2+h//2), (iw//2-w//2):(iw//2+w//2), :]
return img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8
def augment(img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8):
if random.uniform(0, 1) < 0.5:
img_0 = img_0[:, :, ::-1]
img_1 = img_1[:, :, ::-1]
img_2 = img_2[:, :, ::-1]
img_3 = img_3[:, :, ::-1]
img_4 = img_4[:, :, ::-1]
img_5 = img_5[:, :, ::-1]
img_6 = img_6[:, :, ::-1]
img_7 = img_7[:, :, ::-1]
img_8 = img_8[:, :, ::-1]
if random.uniform(0, 1) < 0.5:
img_0 = img_0[:, ::-1]
img_1 = img_1[:, ::-1]
img_2 = img_2[:, ::-1]
img_3 = img_3[:, ::-1]
img_4 = img_4[:, ::-1]
img_5 = img_5[:, ::-1]
img_6 = img_6[:, ::-1]
img_7 = img_7[:, ::-1]
img_8 = img_8[:, ::-1]
return img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8
class Adobe240(Dataset):
def __init__(self, data_root='/home/ltkong/Datasets/adobe240_frames', mode="train", interFrames=7, n_inputs=2, ext="png"):
self.mode = mode
self.interFrames = interFrames
self.n_inputs = n_inputs
self.setLength = (n_inputs-1)*(interFrames+1)+1 ## We require these many frames in total for interpolating `interFrames` number of
## intermediate frames with `n_input` input frames.
self.data_root = os.path.join(data_root)
video_list = os.listdir(self.data_root)[9::10]
self.frames_list = []
self.file_list = []
for video in video_list:
frames = sorted(os.listdir(os.path.join(self.data_root , video)))
n_sets = (len(frames) - self.setLength)//(interFrames+1) + 1
videoInputs = [frames[(interFrames+1)*i:(interFrames+1)*i+self.setLength ] for i in range(n_sets)]
videoInputs = [[os.path.join(video , f) for f in group] for group in videoInputs]
def __getitem__(self, idx):
imgpaths = [os.path.join(self.data_root , fp) for fp in self.file_list[idx]]
pick_idxs = list(range(0,self.setLength,self.interFrames+1))
rem = self.interFrames%2
gt_idx = list(range(self.setLength//2-self.interFrames//2 , self.setLength//2+self.interFrames//2+rem))
input_paths = [imgpaths[idx] for idx in pick_idxs]
gt_paths = [imgpaths[idx] for idx in gt_idx]
img_0 = np.array(read(input_paths[0]))
img_1 = np.array(read(gt_paths[0]))
img_2 = np.array(read(gt_paths[1]))
img_3 = np.array(read(gt_paths[2]))
img_4 = np.array(read(gt_paths[3]))
img_5 = np.array(read(gt_paths[4]))
img_6 = np.array(read(gt_paths[5]))
img_7 = np.array(read(gt_paths[6]))
img_8 = np.array(read(input_paths[1]))
if self.mode == 'train':
img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8 = random_crop(img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8, (256, 256))
img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8 = augment(img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8)
elif self.mode == 'test':
img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8 = center_crop(img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8, (512, 512))
img_0 = img_0.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_1 = img_1.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_2 = img_2.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_3 = img_3.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_4 = img_4.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_5 = img_5.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_6 = img_6.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_7 = img_7.transpose((2, 0, 1)).astype(np.float32) / 255.0
img_8 = img_8.transpose((2, 0, 1)).astype(np.float32) / 255.0
emb_1 = np.array(1/8).reshape(1, 1, 1).astype(np.float32)
emb_2 = np.array(2/8).reshape(1, 1, 1).astype(np.float32)
emb_3 = np.array(3/8).reshape(1, 1, 1).astype(np.float32)
emb_4 = np.array(4/8).reshape(1, 1, 1).astype(np.float32)
emb_5 = np.array(5/8).reshape(1, 1, 1).astype(np.float32)
emb_6 = np.array(6/8).reshape(1, 1, 1).astype(np.float32)
emb_7 = np.array(7/8).reshape(1, 1, 1).astype(np.float32)
return img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8, emb_1, emb_2, emb_3, emb_4, emb_5, emb_6, emb_7
def __len__(self):
return len(self.file_list)
If you have found other Adobe240 test dataset selection, you can report IFRNet's results on your evaluation script. Thanks.
Hi, may I get the list of your test videos for Adobe240fps datasets?
I would like to know which subset of Adobe dataset is used, I tested all of them and the values are very low, is it also cropped to 512*512? Thanks