erkil1452 / gaze360

Code for the Gaze360: Physically Unconstrained Gaze Estimation in the Wild Dataset
http://gaze360.csail.mit.edu
Other
225 stars 42 forks source link

Cross-dataset evaluation #39

Closed AgentKu closed 2 years ago

AgentKu commented 2 years ago

Hi, I am very interested in your gaze360. Recently, I am trying to reproduce your results. When I try to test the Cross-dataset evaluation(train in Gaze360, and test in Columbia Gaze), I find my result(10.8°) is much lower than that showing in your paper(9.0°)。 Here, my method is to use the LSTM model with 7 identical images.

After that, I also try to train a static model(just resnet18 and no LSTM), The result(13.3°) is also lower than 9.0°

I use all the image in Columbia Gaze for testing. And my annotation from the image name:V and H gaze[0] = -math.cos(V) math.sin(H) gaze[1] = math.sin(V) gaze[2] = -math.cos(V) math.cos(H)

I hope you can answer my doubts or point out my mistakes in my operation. Thanks

erkil1452 commented 2 years ago

Hi, you say your result 10.8 is lower than 9.0? That seems like a higher number (error) to me.

It will be almost impossible after so long reconstruct what exactly is the difference.

However, if it helps, I am attaching our Columbia preprocessor. However, I do not guarantee this is the final correct version of the code.

It seems we do not have minus in the gaze[0] equation (so our yaw goes the other way around).

` import sys; sys.path.insert(0, '.')

import numpy as np import sys, os, re, time, json, glob, math, argparse import pickle import cv2 import scipy.io as sio import scipy.interpolate as sci

import matplotlib matplotlib.use('Qt5Agg') import matplotlib.pyplot as plt

def angles2Dir(angles): gaze3d = np.zeros([angles.shape[0], 3], angles.dtype) gaze3d[:,0] = np.sin(angles[:,0]) np.cos(angles[:,1]) gaze3d[:,2] = -np.cos(angles[:,0]) np.cos(angles[:,1]) gaze3d[:,1] = np.sin(angles[:,1]) return gaze3d

class ColumbiaCompiler(object):

def __init__(self):
    super(ColumbiaCompiler, self).__init__()

def run(self, exportImages = False):

    baseDir = os.path.join(myglobals.DATASETS_PATH, 'Columbia')
    outputPath = dataset_tools.preparePath(os.path.join(baseDir, 'compiled'))

    print('[ColumbiaCompiler] Processing %s...' % baseDir)

    subjectsPath = os.path.join(baseDir, 'Columbia Gaze Data Set')
    subjectDirs = os.listdir(subjectsPath)
    subjectDirs = np.array(subjectDirs, np.object)
    subjectDirs.sort()

    meta = {
        'subject': [],
        'filename': [],
        'distance_m': [],
        'hp_deg': [],
        'gaze_cam_deg': [],
        'gaze_dir': [],
        'face_bbox': [],
    }

    faceBbox = np.array([(5184 - 2800) // 2, 330, 2800, 2800], np.float32) / [5184, 3456, 5184, 3456]

    for i,subjectName in enumerate(subjectDirs):
        subjectPath = os.path.join(subjectsPath, subjectName)
        if not os.path.isdir(subjectPath):
            continue
        print('\tAdding %s [%d/%d]...' % (subjectName, i, len(subjectDirs)))
        subjectId = int(re.match('(\d+)$', subjectName).group(1))

        # Enum frames
        files = os.listdir(subjectPath)
        files = np.array(files, np.object)
        files.sort()

        for j,file in enumerate(files):
            imPath = os.path.join(subjectPath, file)
            if not re.match('.*\.jpg$', file) or not os.path.isfile(imPath):
                continue

            print('\t\t[%d/%d] [%d/%d] %s...' % (i, len(subjectDirs), j, len(files), file))

            # Gaze data
            m = re.match('(\d+)\_(\d+)m\_([\-\d]+)P\_([\-\d]+)V\_([\-\d]+)H\.jpg$', file)
            distance = float(m.group(2))
            pose = np.array([-float(m.group(3)), 0], np.float32)
            gazeCamDeg = np.array([-float(m.group(5)), float(m.group(4))], np.float32)

            gazeCam = dataset_tools.angles2Dir(gazeCamDeg.reshape(1,2) / 180 * math.pi)[0,:]

            meta['subject'] += [subjectId]
            meta['filename'] += [file]
            meta['distance_m'] += [distance]
            meta['hp_deg'] += [pose]
            meta['gaze_cam_deg'] += [gazeCamDeg]
            meta['gaze_dir'] += [gazeCam]
            meta['face_bbox'] += [faceBbox]

            # Crop face image
            if exportImages:
                im = cv2.imread(imPath)
                imFace = image_tools.cropImage(im, faceBbox)

                imFaceLow = cv2.resize(imFace, (800, 800), interpolation=cv2.INTER_AREA)
                #cv2.imshow('imFaceLow', imFaceLow)
                #cv2.waitKey(0)

                imFaceDir = dataset_tools.preparePath(os.path.join(outputPath, 'face', '%04d' % subjectId))
                cv2.imwrite(os.path.join(imFaceDir, file), imFaceLow, [cv2.IMWRITE_JPEG_QUALITY, 95])

    meta['filename'] = np.array(meta['filename'], np.object)

    outputMetaFile = os.path.join(outputPath, 'metadata.mat')
    sio.savemat(outputMetaFile, meta)
    print('DONE')

def readFaceImage(self, imFilename):
    im = cv2.imread(imFilename)
    imMax = np.max(im, axis=2)
    mask = imMax > 20
    inds = np.argwhere(mask)
    aa = np.min(inds, axis=0)
    bb = np.max(inds, axis=0) + 1
    aaBB = np.concatenate([aa[::-1], bb[::-1] - aa[::-1]]).astype(np.float32) / [im.shape[1], im.shape[0], im.shape[1], im.shape[0]]

    #crop = im[aa[0]:bb[0],aa[1]:bb[1],:]
    crop = image_tools.cropImage(im, aaBB)

    #cv2.imshow('im', im)
    #cv2.imshow('crop', crop)
    #cv2.waitKey(0)

    return crop, aaBB

def vizualize(self):
    dsDir = os.path.join(myglobals.DATASETS_PATH, 'Columbia', 'compiled')
    meta = dataset_tools.loadMetadata(os.path.join(dsDir, 'metadata.mat'))

    subjectId = 53

    mask = meta['subject'] == subjectId
    inds = np.argwhere(mask).flatten()

    for i,ind in enumerate(inds):
        filename = meta['filename'][ind]
        imFile = os.path.join(dsDir, 'face', '%04d' % subjectId, filename)
        gaze = meta['gaze_dir'][ind,:]
        #gaze = meta['orig_gaze'][ind,:] * [-1,-1,1]
        print('[%d/%d] %s | Gaze = %s' % (i, len(inds), imFile, np.array2string(gaze)))

        # Render
        im = cv2.imread(imFile)

        gaze = gaze * [-1,1,1]
        VizTools.drawGazeDirs(im, gaze.reshape(1,-1), [(0,249,255)], np.array([[0.5, 0.5]]), [[2.0, 2.0]])

        # Present
        cv2.imshow('Face', im)
        if cv2.waitKey(0) & 0xff == 27:
            break

if name == "main":

ex = ColumbiaCompiler()
#ex.run()
ex.vizualize()

`

AgentKu commented 2 years ago

Your suggestion effectively helps me. With reference to your data processor, I found that the problem lies in the image used. In the previous experiments, I used the raw image from the Columbia. After the image was cropped, the cross-dataset evolution result returned to the normal value. Thanks a lot.