Closed ritvikagrawal1 closed 2 years ago
#coding=utf-8 import pdb import os, sys, random import numpy as np import torch import torch.nn.functional as F import torch.utils.data as data from PIL import Image from mtcnn import MTCNN detector = MTCNN() import operator import cv2 ## data generator for afew class VideoDataset(data.Dataset): def __init__(self, video_root, video_list, rectify_label=None, transform=None, csv = False): self.imgs_first, self.index = load_imgs_total_frame(video_root, video_list, rectify_label) self.transform = transform def __getitem__(self, index): path_first, target_first = self.imgs_first[index] img_cv2 = np.array(Image.open(path_first).convert("RGB")) face=detector.detect_faces(img_cv2) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2.shape[1]) img_first=Image.fromarray(img_cv2[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_first = self.transform(img_first) return img_first, target_first, self.index[index] def __len__(self): return len(self.imgs_first) # class TripleImageDataset(data.Dataset): def __init__(self, video_root, video_list, rectify_label=None, transform=None): self.imgs_first, self.imgs_second, self.imgs_third, self.index = load_imgs_tsn(video_root, video_list, rectify_label) self.transform = transform def __getitem__(self, index): path_first, target_first = self.imgs_first[index] img_cv2_f = np.array(Image.open(path_first).convert("RGB")) face=detector.detect_faces(img_cv2_f) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2_f.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2_f.shape[1]) img_first=Image.fromarray(img_cv2_f[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_first = self.transform(img_first) path_second, target_second = self.imgs_second[index] img_cv2_s = np.array(Image.open(path_second).convert("RGB")) face=detector.detect_faces(img_cv2_s) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2_s.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2_s.shape[1]) img_second=Image.fromarray(img_cv2_s[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_second = self.transform(img_second) path_third, target_third = self.imgs_third[index] img_cv2_t = np.array(Image.open(path_third).convert("RGB")) face=detector.detect_faces(img_cv2_t) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2_t.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2_t.shape[1]) img_third=Image.fromarray(img_cv2_t[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_third = self.transform(img_third) return img_first, img_second, img_third, target_first, self.index[index] def __len__(self): return len(self.imgs_first) def load_imgs_tsn(video_root, video_list, rectify_label): imgs_first = list() imgs_second = list() imgs_third = list() with open(video_list, 'r') as imf: index = [] for id, line in enumerate(imf): video_label = line.strip().split() video_name = video_label[0] # name of video label = rectify_label[video_label[1]] # label of video video_path = os.path.join(video_root, video_name) # video_path is the path of each video ### for sampling triple imgs in the single video_path #### img_lists = os.listdir(video_path) img_lists.sort() # sort files by ascending img_count = len(img_lists) # number of frames in video num_per_part = int(img_count) // 3 if int(img_count) > 3: for i in range(img_count): random_select_first = random.randint(0, num_per_part) random_select_second = random.randint(num_per_part, num_per_part * 2) random_select_third = random.randint(2 * num_per_part, len(img_lists) - 1) img_path_first = os.path.join(video_path, img_lists[random_select_first]) img_path_second = os.path.join(video_path, img_lists[random_select_second]) img_path_third = os.path.join(video_path, img_lists[random_select_third]) imgs_first.append((img_path_first, label)) imgs_second.append((img_path_second, label)) imgs_third.append((img_path_third, label)) else: for j in range(len(img_lists)): img_path_first = os.path.join(video_path, img_lists[j]) img_path_second = os.path.join(video_path, random.choice(img_lists)) img_path_third = os.path.join(video_path, random.choice(img_lists)) imgs_first.append((img_path_first, label)) imgs_second.append((img_path_second, label)) imgs_third.append((img_path_third, label)) ### return video frame index ##### index.append(np.ones(img_count) * id) # id: 0 : 379 index = np.concatenate(index, axis=0) # index = index.astype(int) return imgs_first, imgs_second, imgs_third, index def load_imgs_total_frame(video_root, video_list, rectify_label): imgs_first = list() with open(video_list, 'r') as imf: index = [] video_names = [] for id, line in enumerate(imf): video_label = line.strip().split() video_name = video_label[0] # name of video label = rectify_label[video_label[1]] # label of video video_path = os.path.join(video_root, video_name) # video_path is the path of each video ### for sampling triple imgs in the single video_path #### img_lists = os.listdir(video_path) img_lists.sort() # sort files by ascending img_count = len(img_lists) # number of frames in video for frame in img_lists: # pdb.set_trace() imgs_first.append((os.path.join(video_path, frame), label)) ### return video frame index ##### video_names.append(video_name) index.append(np.ones(img_count) * id) index = np.concatenate(index, axis=0) # index = index.astype(int) return imgs_first, index ## data generator for ck_plus class TenFold_VideoDataset(data.Dataset): def __init__(self, video_root='', video_list='', rectify_label=None, transform=None, fold=1, run_type='train'): self.imgs_first, self.index = load_imgs_tenfold_totalframe(video_root, video_list, rectify_label, fold, run_type) self.transform = transform self.video_root = video_root def __getitem__(self, index): path_first, target_first = self.imgs_first[index] img_cv2 = np.array(Image.open(path_first).convert("RGB")) face=detector.detect_faces(img_cv2) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2.shape[1]) img_first=Image.fromarray(img_cv2[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_first = self.transform(img_first) return img_first, target_first, self.index[index] def __len__(self): return len(self.imgs_first) class TenFold_TripleImageDataset(data.Dataset): def __init__(self, video_root='', video_list='', rectify_label=None, transform=None, fold=1, run_type='train'): self.imgs_first, self.imgs_second, self.imgs_third, self.index = load_imgs_tsn_tenfold(video_root,video_list,rectify_label, fold, run_type) self.transform = transform self.video_root = video_root def __getitem__(self, index): path_first, target_first = self.imgs_first[index] img_cv2_f = np.array(Image.open(path_first).convert("RGB")) face=detector.detect_faces(img_cv2_f) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2_f.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2_f.shape[1]) img_first=Image.fromarray(img_cv2_f[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_first = self.transform(img_first) path_second, target_second = self.imgs_second[index] img_cv2_s = np.array(Image.open(path_second).convert("RGB")) face=detector.detect_faces(img_cv2_s) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2_s.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2_s.shape[1]) img_second=Image.fromarray(img_cv2_s[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_second = self.transform(img_second) path_third, target_third = self.imgs_third[index] img_cv2_t = np.array(Image.open(path_third).convert("RGB")) face=detector.detect_faces(img_cv2_t) face.sort(key=operator.itemgetter('confidence'),reverse=True) start_c=face[0]['keypoints']['left_eye'] end_c=face[0]['keypoints']['right_eye'] n_p=face[0]['keypoints']['nose'] y_s=max(0,min(start_c[1],end_c[1])-(n_p[1]-start_c[1])) y_s_e=min(max(start_c[1],end_c[1])+(n_p[1]-start_c[1])//2,img_cv2_t.shape[0]) x_s=max(0,start_c[0]-(end_c[0]-start_c[0])//2) x_s_e=min(end_c[0]+(end_c[0]-start_c[0])//2,img_cv2_t.shape[1]) img_third=Image.fromarray(img_cv2_t[y_s:y_s_e,x_s:x_s_e,:]).convert("RGB") if self.transform is not None: img_third = self.transform(img_third) return img_first, img_second, img_third, target_first, self.index[index] def __len__(self): return len(self.imgs_first) def load_imgs_tenfold_totalframe(video_root, video_list, rectify_label, fold, run_type): imgs_first = list() new_imf = list() ''' Make ten-fold list ''' with open(video_list, 'r') as imf: imf = imf.readlines() if run_type == 'train': fold_ = list(range(1, 11)) fold_.remove(fold) # [1,2,3,4,5,6,7,8,9, 10] -> [2,3,4,5,6,7,8,9,10] for i in fold_: fold_str = str(i) + '-fold' # 1-fold for index, item in enumerate( imf): # 0, '1-fold\t31\n' in {[0, '1-fold\t31\n'], [1, 'S037/006 Happy\n'], ...} if fold_str in item: # 1-fold in '1-fold\t31\n' for j in range(index + 1, index + int(item.split()[1]) + 1): # (0 + 1, 0 + 31 + 1 ) new_imf.append(imf[j]) # imf[2] = 'S042/006 Happy\n' if run_type == 'test': fold_ = fold fold_str = str(fold_) + '-fold' for index, item in enumerate(imf): if fold_str in item: for j in range(index + 1, index + int(item.split()[1]) + 1): new_imf.append(imf[j]) index = [] for id, line in enumerate(new_imf): video_label = line.strip().split() video_name = video_label[0] # name of video try: label = rectify_label[video_label[1]] # label of video except: pdb.set_trace() video_path = os.path.join(video_root, video_name) # video_path is the path of each video ### for sampling triple imgs in the single video_path #### img_lists = os.listdir(video_path) img_lists.sort() # sort files by ascending img_lists = img_lists[ - int(round(len(img_lists))) : ] img_count = len(img_lists) # number of frames in video for frame in img_lists: imgs_first.append((os.path.join(video_path, frame), label)) ### return video frame index ##### index.append(np.ones(img_count) * id) index = np.concatenate(index, axis=0) return imgs_first, index def load_imgs_tsn_tenfold(video_root, video_list, rectify_label, fold, run_type): imgs_first = list() imgs_second = list() imgs_third = list() new_imf = list() ''' Make ten-fold list ''' with open(video_list, 'r') as imf: imf = imf.readlines() if run_type == 'train': fold_ = list(range(1, 11)) fold_.remove(fold) # [1,2,3,4,5,6,7,8,9,10] -> [2,3,4,5,6,7,8,9,10] for i in fold_: fold_str = str(i) + '-fold' # 1-fold for index, item in enumerate( imf): # 0, '1-fold\t31\n' in {[0, '1-fold\t31\n'], [1, 'S037/006 Happy\n'], ...} if fold_str in item: # 1-fold in '1-fold\t31\n' for j in range(index + 1, index + int(item.split()[1]) + 1): # (0 + 1, 0 + 31 + 1 ) new_imf.append(imf[j]) # imf[2] = 'S042/006 Happy\n' if run_type == 'test': fold_ = fold fold_str = str(fold_) + '-fold' for index, item in enumerate(imf): if fold_str in item: for j in range(index + 1, index + int(item.split()[1]) + 1): new_imf.append(imf[j]) ''' Make triple-image list ''' index = [] for id, line in enumerate(new_imf): video_label = line.strip().split() video_name = video_label[0] # name of video label = rectify_label[video_label[1]] # label of video video_path = os.path.join(video_root, video_name) # video_path is the path of each video ### for sampling triple imgs in the single video_path #### img_lists = os.listdir(video_path) img_lists.sort() # sort files by ascending img_lists = img_lists[ - int(round(len(img_lists))):] img_count = len(img_lists) # number of frames in video num_per_part = int(img_count) // 5 if int(img_count) > 5: for i in range(img_count): # pdb.set_trace() random_select_first = random.randint(0, num_per_part) random_select_second = random.randint(num_per_part, 2 * num_per_part) random_select_third = random.randint(2 * num_per_part, 3 * num_per_part) img_path_first = os.path.join(video_path, img_lists[random_select_first]) img_path_second = os.path.join(video_path, img_lists[random_select_second]) img_path_third = os.path.join(video_path, img_lists[random_select_third]) imgs_first.append((img_path_first, label)) imgs_second.append((img_path_second, label)) imgs_third.append((img_path_third, label)) else: for j in range(len(img_lists)): img_path_first = os.path.join(video_path, img_lists[j]) img_path_second = os.path.join(video_path, random.choice(img_lists)) img_path_third = os.path.join(video_path, random.choice(img_lists)) imgs_first.append((img_path_first, label)) imgs_second.append((img_path_second, label)) imgs_third.append((img_path_third, label)) ### return video frame index ##### index.append(np.ones(img_count) * id) # id: 0 : 379 index = np.concatenate(index, axis=0) # index = index.astype(int) # pdb.set_trace() return imgs_first, imgs_second, imgs_third, index
Can you please point out the mistake ?
您的邮件已接收!
No error is reported, so closing. In the future please be mindful of others and don't @ mention everyone like this.
Can you please point out the mistake ?