Closed vuthede closed 2 years ago
I just found the solution.
Just use some transofrmation matrix in function normalizeData_face
in file normalization_example.py for landmarks annotation.
I just found the solution. Just use some transformation matrix in function
normalizeData_face
in file normalization_example.py for landmarks annotation.
Could you please elaborate on that? what do you mean by some transformation matrix? As far as I know, 224, and 448 datasets don't include any transformation matrices. I also would like to get the eye crops, the option that I have now is to do landmark detection on the normalized face patches, then crop them.
Hi @AbdouMechraoui . I've just write the code that leverage some code in normalization_example.py
to revert the lmks and then can crop the eyes.
from operator import sub
import numpy as np
import h5py
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import os
import json
import random
from typing import List
import cv2
import pandas as pd
import albumentations as A
import imgaug.augmenters as iaa
import time
import math
transformerr = A.Compose(
[
A.ColorJitter (brightness=0.35, contrast=0.5, saturation=0.5, hue=0.2, always_apply=False, p=0.7),
A.ShiftScaleRotate (shift_limit=0.1, scale_limit=(-0.1,0.1), rotate_limit=5, interpolation=1, border_mode=1, always_apply=False, p=1)
]
)
strongseq = iaa.Sequential([
iaa.CoarseDropout((0.1, 0.15), size_percent=(0.02, 0.03)),
iaa.JpegCompression(compression=(20, 97)),
iaa.CoarsePepper(0.05, size_percent=(0.01, 0.1)),
])
def get_train_loader(data_dir,
batch_size,
num_workers=4,
is_shuffle=True):
# load dataset
refer_list_file = f'{data_dir}/raw/data/train_test_split.json'
print('load the train file list from: ', refer_list_file)
with open(refer_list_file, 'r') as f:
datastore = json.load(f)
print(datastore["train"])
# there are three subsets for ETH-XGaze dataset: train, test and test_person_specific
# train set: the training set includes 80 participants data
# test set: the test set for cross-dataset and within-dataset evaluations
# test_person_specific: evaluation subset for the person specific setting
sub_folder_use = 'train'
train_set = GazeDataset(dataset_path=data_dir, keys_to_use=datastore[sub_folder_use], sub_folder=sub_folder_use,
transform=None, is_shuffle=is_shuffle, is_load_label=True)
train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers)
# train_loader = train_set
return train_loader
def get_test_loader(data_dir,
batch_size,
num_workers=4,
is_shuffle=True):
# load dataset
refer_list_file = f'{data_dir}/raw/data/train_test_split.json'
print('load the train file list from: ', refer_list_file)
with open(refer_list_file, 'r') as f:
datastore = json.load(f)
# there are three subsets for ETH-XGaze dataset: train, test and test_person_specific
# train set: the training set includes 80 participants data
# test set: the test set for cross-dataset and within-dataset evaluations
# test_person_specific: evaluation subset for the person specific setting
sub_folder_use = 'test'
test_set = GazeDataset(dataset_path=data_dir, keys_to_use=datastore[sub_folder_use], sub_folder=sub_folder_use,
transform=None, is_shuffle=is_shuffle, is_load_label=False)
test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=num_workers)
return test_loader
def get_val_loader(data_dir,
batch_size,
num_workers=4,
is_shuffle=True):
# load dataset
refer_list_file = f'{data_dir}/raw/data/train_test_split.json'
print('load the train file list from: ', refer_list_file)
with open(refer_list_file, 'r') as f:
datastore = json.load(f)
# there are three subsets for ETH-XGaze dataset: train, test and test_person_specific
# train set: the training set includes 80 participants data
# test set: the test set for cross-dataset and within-dataset evaluations
# test_person_specific: evaluation subset for the person specific setting
sub_folder_use = 'val'
test_set = GazeDataset(dataset_path=data_dir, keys_to_use=datastore[sub_folder_use], sub_folder=sub_folder_use,
transform=None, is_shuffle=is_shuffle, is_load_label=True, augmentation=False)
test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=num_workers)
return test_loader
class GazeDataset(Dataset):
EYE_PATCH_SIZE = (60, 60)
def __init__(self, dataset_path: str, keys_to_use: List[str] = None, sub_folder='', transform=None, is_shuffle=True,
index_file=None, is_load_label=True, augmentation=True):
self.path = dataset_path
self.hdfs = {}
self.sub_folder = sub_folder
self.is_load_label = is_load_label
self.augmentation = augmentation
# assert len(set(keys_to_use) - set(all_keys)) == 0
# Select keys
# TODO: select only people with sufficient entries?
self.selected_keys = [k for k in keys_to_use]
assert len(self.selected_keys) > 0
for num_i in range(0, len(self.selected_keys)):
file_path = os.path.join(self.path, self.sub_folder, self.selected_keys[num_i])
self.hdfs[num_i] = h5py.File(file_path, 'r', swmr=True)
# print('read file: ', os.path.join(self.path, self.selected_keys[num_i]))
assert self.hdfs[num_i].swmr_mode
# Construct mapping from full-data index to key and person-specific index
if index_file is None:
self.idx_to_kv = []
for num_i in range(0, len(self.selected_keys)):
n = self.hdfs[num_i]["face_patch"].shape[0]
self.idx_to_kv += [(num_i, i) for i in range(n)]
else:
print('load the file: ', index_file)
self.idx_to_kv = np.loadtxt(index_file, dtype=np.int)
for num_i in range(0, len(self.hdfs)):
if self.hdfs[num_i]:
self.hdfs[num_i].close()
self.hdfs[num_i] = None
if is_shuffle:
random.shuffle(self.idx_to_kv) # random the order to stable the training
self.hdf = None
self.transform = transform
## Devu Add
self.__load_annotation(annotation_dir=f'{self.path}/raw')
self.hashtable = {}
def __load_annotation(self, annotation_dir):
################## Parameters #################################################
resize_factor = 8
is_distor = False # distortion is disable since it cost too much time, and the face is always in the center of image
report_interval = 60
is_over_write = True
face_patch_size = 224
###########################################################################
# load camera matrix
self.camera_matrix = []
self.camera_distortion = []
self.cam_translation = []
self.cam_rotation = []
self.annotation_dir = annotation_dir
print('Load the camera parameters')
for cam_id in range(0, 18):
file_name = f'{self.annotation_dir}/calibration/cam_calibration/' + 'cam' + str(cam_id).zfill(2) + '.xml'
fs = cv2.FileStorage(file_name, cv2.FILE_STORAGE_READ)
self.camera_matrix.append(fs.getNode('Camera_Matrix').mat())
self.camera_distortion.append(fs.getNode('Distortion_Coefficients').mat()) # here we disable distortion
self.cam_translation.append(fs.getNode('cam_translation').mat())
self.cam_rotation.append(fs.getNode('cam_rotation').mat())
fs.release()
# load face model
face_model_load = np.loadtxt(f'{self.annotation_dir}/calibration/face_model.txt')
landmark_use = [20, 23, 26, 29, 15, 19]
self.face_model = face_model_load[landmark_use, :]
def __normalizeData_face(self, face_model, landmarks, hr, ht, gc, cam):
## normalized camera parameters
focal_norm = 960 # focal length of normalized camera
distance_norm = 300 # normalized distance between eye and camera
roiSize = (448, 448) # size of cropped eye image
## compute estimated 3D positions of the landmarks
ht = ht.reshape((3, 1))
gc = gc.reshape((3, 1))
hR = cv2.Rodrigues(hr)[0] # rotation matrix
Fc = np.dot(hR, face_model.T) + ht
two_eye_center = np.mean(Fc[:, 0:4], axis=1).reshape((3, 1))
mouth_center = np.mean(Fc[:, 4:6], axis=1).reshape((3, 1))
face_center = np.mean(np.concatenate((two_eye_center, mouth_center), axis=1), axis=1).reshape((3, 1))
## ---------- normalize image ----------
distance = np.linalg.norm(face_center) # actual distance between eye and original camera
z_scale = distance_norm / distance
cam_norm = np.array([
[focal_norm, 0, roiSize[0] / 2],
[0, focal_norm, roiSize[1] / 2],
[0, 0, 1.0],
])
S = np.array([ # scaling matrix
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, z_scale],
])
hRx = hR[:, 0]
forward = (face_center / distance).reshape(3)
down = np.cross(forward, hRx)
down /= np.linalg.norm(down)
right = np.cross(down, forward)
right /= np.linalg.norm(right)
R = np.c_[right, down, forward].T # rotation matrix R
W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix
## ---------- normalize rotation ----------
hR_norm = np.dot(R, hR) # rotation matrix in normalized space
hr_norm = cv2.Rodrigues(hR_norm)[0] # convert rotation matrix to rotation vectors
## ---------- normalize gaze vector ----------
gc_normalized = gc - face_center # gaze vector
gc_normalized = np.dot(R, gc_normalized)
gc_normalized = gc_normalized / np.linalg.norm(gc_normalized)
# warp the facial landmarks
num_point, num_axis = landmarks.shape
det_point = landmarks.reshape([num_point, 1, num_axis])
det_point_warped = cv2.perspectiveTransform(det_point, W)
det_point_warped = det_point_warped.reshape(num_point, num_axis)
head = hr_norm.reshape(1, 3)
M = cv2.Rodrigues(head)[0]
Zv = M[:, 2]
head_2d = np.array([math.asin(Zv[1]), math.atan2(Zv[0], Zv[2]), 0.0]) # Add roll==0
return head_2d, gc_normalized, det_point_warped, R
def __len__(self):
return len(self.idx_to_kv)
def __del__(self):
for num_i in range(0, len(self.hdfs)):
if self.hdfs[num_i]:
self.hdfs[num_i].close()
self.hdfs[num_i] = None
def __segment_eye(self, image, lmks, eye='left', ow=64, oh=64):
if eye=='left':
# Left eye
x1, y1 = lmks[36]
x2, y2 = lmks[39]
else: # right eye
x1, y1 = lmks[42]
x2, y2 = lmks[45]
eye_width = 1.5 * np.linalg.norm(x1-x2)
cx, cy = 0.5 * (x1 + x2), 0.5 * (y1 + y2)
# center image on middle of eye
translate_mat = np.asmatrix(np.eye(3))
translate_mat[:2, 2] = [[-cx], [-cy]]
# Scale
scale = ow / (eye_width+1e-6)
scale_mat = np.asmatrix(np.eye(3))
scale_mat[0, 0] = scale_mat[1, 1] = scale
# center image
center_mat = np.asmatrix(np.eye(3))
center_mat[:2, 2] = [[0.5 * ow], [0.5 * oh]]
# Get rotated and scaled, and segmented image
transform_mat = center_mat * scale_mat * translate_mat
eye_image = cv2.warpAffine(image, transform_mat[:2, :], (oh, ow))
return eye_image
def __crop_eye(self, face, fid, key, idx):
cam_id = fid["cam_index"][idx][0]-1
key_tmp = key.replace(".h5", "")
df = pd.read_csv(f"{self.annotation_dir}/data/annotation_{self.sub_folder}/{key_tmp}.csv", header=None)
# print("sssssssssssssssssSSS:", key)
line = df.loc[idx,:].to_numpy()
lmks = line[13:149].reshape(-1, 2).astype(float)
gaze_label_3d = np.array([float(line[4]), float(line[5]), float(line[6])]).reshape(3, 1) # gaze point on the screen coordinate system
hr = np.array([float(line[7]), float(line[8]), float(line[9])]).reshape(3, 1)
ht = np.array([float(line[10]), float(line[11]), float(line[12])]).reshape(3, 1)
head_2D, gaze_norm, landmark_norm, mat_norm_face = \
self.__normalizeData_face(self.face_model, lmks, hr, ht, gaze_label_3d, self.camera_matrix[cam_id])
landmark_norm = landmark_norm / 2 ## 448 size face ---> 224 size face
left_eye = self.__segment_eye(face, landmark_norm, eye='left', ow=self.EYE_PATCH_SIZE[1], oh=self.EYE_PATCH_SIZE[0])
right_eye = self.__segment_eye(face, landmark_norm, eye='right', ow=self.EYE_PATCH_SIZE[1], oh=self.EYE_PATCH_SIZE[0])
return left_eye, right_eye, head_2D
def __preprocess(self, eye):
eye = cv2.resize(eye, self.EYE_PATCH_SIZE)
eye = eye.astype(np.float32)
eye *= 2.0 / 255.0
eye -= 1.0
# eye = np.expand_dims(eye, -1)
eye = np.transpose(eye, (2,0, 1))
return eye
def __getitem__(self, idx):
key, idx = self.idx_to_kv[idx]
if self.selected_keys[key] not in self.hashtable.keys():
self.hdf = h5py.File(os.path.join(self.path, self.sub_folder, self.selected_keys[key]), 'r', swmr=True)
self.hashtable[self.selected_keys[key]] = self.hdf
else:
self.hdf = self.hashtable[self.selected_keys[key]]
assert self.hdf.swmr_mode
# Get face image
image = self.hdf['face_patch'][idx, :]
left_eye, right_eye, head_2D = self.__crop_eye(image, self.hdf, self.selected_keys[key], idx)
## Augmentation
if self.augmentation:
transformed = transformerr(image=left_eye)
left_eye = transformed["image"]
left_eye = strongseq(image=left_eye)
transformed = transformerr(image=right_eye)
right_eye = transformed["image"]
right_eye = strongseq(image=right_eye)
left_eye = self.__preprocess(left_eye)
right_eye = self.__preprocess(right_eye)
# Get labels
if self.is_load_label:
gaze_label = self.hdf['face_gaze'][idx, :]
gaze_label = gaze_label.astype('float')
gaze_label[0] = -gaze_label[0]
headpose_label = self.hdf['face_head_pose'][idx, :]
headpose_label = -headpose_label
headpose_label = np.append(headpose_label, 0.0)
headpose_label = headpose_label[[1,0,2]] # yaw, pitch, roll
return torch.FloatTensor(left_eye),torch.FloatTensor(right_eye), torch.FloatTensor(headpose_label), torch.FloatTensor(gaze_label)
else:
return left_eye, right_eye
def draw_gaze(image_in, eye_pos, pitchyaw, length=15.0, thickness=2, color=(0, 0, 255)):
"""Draw gaze angle on given image with a given eye positions."""
image_out = image_in
if len(image_out.shape) == 2 or image_out.shape[2] == 1:
image_out = cv2.cvtColor(image_out, cv2.COLOR_GRAY2BGR)
dx = -length * np.sin(pitchyaw[1])
dy = length * np.sin(pitchyaw[0])
cv2.arrowedLine(image_out, tuple(np.round(eye_pos).astype(np.int32)),
tuple(np.round([eye_pos[0] + dx, eye_pos[1] + dy]).astype(int)), color,
thickness, cv2.LINE_AA, tipLength=0.2)
return image_out
def draw_headpose(img, pitch, yaw, roll, size=100):
from math import cos, sin
pitch = pitch * np.pi / 180
yaw = -(yaw * np.pi / 180)
roll = roll * np.pi / 180
tdx=50
tdy=50
# roll=0
x1 = size * (cos(yaw) * cos(roll)) + tdx
y1 = size * (cos(pitch) * sin(roll) + cos(roll)
* sin(pitch) * sin(yaw)) + tdy
x2 = size * (-cos(yaw) * sin(roll)) + tdx
y2 = size * (cos(pitch) * cos(roll) - sin(pitch)
* sin(yaw) * sin(roll)) + tdy
# Z-Axis (out of the screen) drawn in blue
x3 = size * (sin(yaw)) + tdx
y3 = size * (-cos(yaw) * sin(pitch)) + tdy
cv2.line(img, (int(tdx), int(tdy)), (int(x1), int(y1)), (255, 0, 0), 3)
cv2.line(img, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 3)
cv2.line(img, (int(tdx), int(tdy)), (int(x3), int(y3)), (0, 0, 255), 2)
return img
if __name__ == "__main__":
test_loader = get_train_loader(data_dir="/media/vuthede/vuthede_hdd/vuthede/data/xgaze_224", batch_size=1, is_shuffle=True)
print(f'Len dataset :{len(test_loader)}')
for databatch in test_loader:
left, right, headpose, gaze = databatch
# print(left.shape, right.shape, headpose.shape, gaze.shape)
img = left.numpy()[0]
img1 = right.numpy()[0]
gaze = gaze.numpy()[0]
headpose = headpose.numpy()[0] * 180.0/3.14
print(gaze, headpose)
img = ((np.transpose(img, (1,2,0)) + 1)*128).astype(np.uint8)
img1 = ((np.transpose(img1, (1,2,0)) + 1)*128).astype(np.uint8)
img = np.ascontiguousarray(img)
img1 = np.ascontiguousarray(img1)
arrowLength = 75
img = draw_gaze(img, (30, 30), gaze[:2], length=15)
img1 = draw_gaze(img1, (30, 30), gaze[:2], length=15)
concat = np.hstack([img, img1])
concat = cv2.resize(concat, (320*2, 320))
concat = draw_headpose(concat, -headpose[1], -headpose[0], 0, 100)
cv2.imshow("Image", concat)
k =cv2.waitKey(0)
if k==27:
break
cv2.destroyAllWindows()
@vuthede thanks a lot for sharing your code :) I see what you did, but it looks like you're using the raw data, the transformation matrix, 3D gaze target point, and landmarks annotations. Is there a way to get just these annotations out of the 7TB file, or did you have to download the full dataset?
@AbdouMechraoui I did not use the raw data.
I use the xgaze_224
version with the annotation of raw data. (.i.e I just downloaded the annotation. I didnot download the 7TB ) :D
@AbdouMechraoui I did not use the raw data. I use the
xgaze_224
version with the annotation of raw data. (.i.e I just downloaded the annotation. I didnot download the 7TB ) :D
My bad! I thought it was the raw data link was a direct download link for the full raw dataset, hence the confusion.
Hi everyone, I wonder is there anyway to get the eye patches given we have
xgaze_224
(cropped face version) andannotations
(lmks in original frame). Because download full frame version is 7T which will take along time to download.Thanks and appreciate if anyone can help.