JARVIS-MoCap / JARVIS-AnnotationTool

AnnotationTool to create multi-view annotations for the JARVIS 3D Markerless Pose Estimation Toolbox
https://jarvis-mocap.github.io/jarvis-docs/
GNU Lesser General Public License v2.1
24 stars 6 forks source link

Reprojection problem #22

Open nishbo opened 1 week ago

nishbo commented 1 week ago

We have noticed that the reprojection of some points is off when looking from the other cameras, e.g., when labeling the tip of the index finger (I am showing 3/4 cameras, the first two were labeled, the third one - reprojected): image image image The calibrations showed errors <<1px, and the debug images look very good.

For testing, I have written my own script based on the reprojection methods in Jarvis, and I get the following: image The red cross is the manual label on the image, green circles - reprojections involving the camera, blue circles - reprojections from a pair of other cameras, purple star - centroid of all of them. As you can see, it can be off by a significant distance, more than half a cm, and in reprojection terms, green circles (using the same camera) deviate from the point by ~5px (which is already a lot for self-repro), while blue ones by ~20px.

I will be looking further into this issue using a different reprojection method, but please advise if there is something I might have missed. I have attached a script that I wrote if you want to try it on some data that you have.

#!python3.11
import os
import glob
import csv

import numpy as np
import PIL
import cv2
import matplotlib.pyplot as plt
import prehension.tools

def load_calibration(filepath):
    fs = cv2.FileStorage(filepath, cv2.FILE_STORAGE_READ)
    fields = ['T', 'R', 'intrinsicMatrix', 'distortionCoefficients']
    calib = {}
    for field in fields:
        calib[field] = fs.getNode(field).mat()

    # How it should be?
    calib['projection_matrix'] = np.dot(
        calib['intrinsicMatrix'], np.hstack((calib['R'], calib['T'])))

    # from Jarvis:
    # calib['cameraMatrix'] = np.transpose(
    #     np.matmul(
    #         np.concatenate((calib['R'], calib['T'].reshape(1, 3)), axis=0),
    #         calib['intrinsicMatrix']),
    #     axes=(0, 1))
    # # print(calib['cameraMatrix'])
    # calib['projection_matrix'] = calib['cameraMatrix'].T

    # print(calib['projection_matrix'])
    calib['projection_matrix'] = np.matmul(
        np.concatenate((calib['R'], calib['T'].reshape(1, 3)), axis=0),
        calib['intrinsicMatrix']).T
    # print(calib['projection_matrix'])
    return calib

def load_point(filename, frame_basename, bodypart):
    with open(filename, 'r') as f:
        rdr = csv.reader(f)
        next(rdr)  # Scorer
        next(rdr)  # entities

        bodyparts = next(rdr)
        bp_i = bodyparts.index(bodypart)

        next(rdr)  # coords

        for li in rdr:
            if li[0] != frame_basename:
                continue
            x = float(li[bp_i])
            y = float(li[bp_i+1])
            break
    return x, y

def triangulate(p1, c1, p2, c2):
    point4D_hom = cv2.triangulatePoints(
        c1['projection_matrix'], c2['projection_matrix'], p1, p2, None)
    point3D = point4D_hom / point4D_hom[3]
    return point3D[:3].squeeze()

def project(c, p):
    X_world = np.array([p[0], p[1], p[2], 1])
    # Project the 3D point to 2D using the projection matrix
    X_image_homogeneous = np.dot(c['projection_matrix'], X_world)
    # Convert from homogeneous to normal coordinates
    X_image = X_image_homogeneous[:2] / X_image_homogeneous[2]
    return X_image[:2].squeeze()

def prp(point):
    return '(' + ', '.join([f'{p:.2f}' for p in point]) + ')'

def pdist(point1, point2):
    return sum((p1-p2)**2 for p1, p2 in zip(point1, point2))**0.5

def triangulate_reproject_points(calibs, points, frames, undistort_images=False):
    # for each view
    triangulated_points = {}
    reprojected_points = {}
    # for each pair of cameras
    # triangulate
    # reproject
    for cam1, calib1 in calibs.items():
        point1 = points[cam1]
        triangulated_points[cam1] = {}
        reprojected_points[cam1] = {}
        for cam2, calib2 in calibs.items():
            if cam1 == cam2:
                continue
            point2 = points[cam2]
            triangulated_points[cam1][cam2] = triangulate(point1, calib1, point2, calib2)
            reprojected_points[cam1][cam2] = project(calib1, triangulated_points[cam1][cam2])

    # points reprojected from estimations from other cameras
    ou_reprojected_points = {}
    for cam1, point1 in points.items():
        ou_reprojected_points[cam1] = []
        for cam2, tps in triangulated_points.items():
            if cam2 == cam1:
                continue
            for cam3, tp in tps.items():
                if cam3 == cam1:  # not with self
                    continue
                ou_reprojected_points[cam1].append(project(calibs[cam1], tp))

    # centroid
    centroid_tp = [
        np.median(sum([[tp[0] for tp in tps.values()]
                       for tps in triangulated_points.values()], [])),
        np.median(sum([[tp[1] for tp in tps.values()]
                       for tps in triangulated_points.values()], [])),
        np.median(sum([[tp[2] for tp in tps.values()]
                       for tps in triangulated_points.values()], []))]
    centroid_rps = {}
    for camera, calib in calibs.items():
        centroid_rps[camera] = project(calib, centroid_tp)

    # print
    print('Reprojection results:')
    for cam1, point1 in points.items():
        print(f'\tCamera {cam1}:')
        print(f'\t\tOriginal point: {prp(point1)}')
        print('\t\tTriangulated points and their reprojections:')
        for tp, rp in zip(triangulated_points[cam1].values(), reprojected_points[cam1].values()):
            print(f'\t\t\t{prp(tp)}: {prp(rp)}, diff: {pdist(rp, point1):.2f}')

        print('\t\tReprojections of points triangulated from other views:')
        for rp in ou_reprojected_points[cam1]:
            print(f'\t\t\t{prp([0]*3)}: {prp(rp)}, diff: {pdist(rp, point1):.2f}')

        print('\t\tCentroid:')
        rp = centroid_rps[cam1]
        print(f'\t\t\t{prp(centroid_tp)}: {prp(rp)}, diff: {pdist(rp, point1):.2f}')

    # display
    # original and reprojected
    fig = plt.figure()
    xn_subplots, yn_subplots = prehension.tools.plotting.xy_numsubplots(len(points))
    for icam, (cam1, point1) in enumerate(points.items()):
        ax = fig.add_subplot(xn_subplots, yn_subplots, icam+1)
        img = np.asarray(PIL.Image.open(frames[cam1]))
        if undistort_images:
            img = undistort_image(calibs[cam1], img)
        ax.imshow(img)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.plot(point1[0], point1[1], 'r+')
        for point2 in reprojected_points[cam1].values():
            ax.plot(point2[0], point2[1], 'go', markerfacecolor='none')
        for point2 in ou_reprojected_points[cam1]:
            ax.plot(point2[0], point2[1], 'bo', markerfacecolor='none')
        ax.plot(centroid_rps[cam1][0], centroid_rps[cam1][1], 'm*')
        ax.set_xlim([0, np.size(img, 1)])
        ax.set_ylim([np.size(img, 0), 0])
        ax.set_title(cam1)

    return triangulated_points, reprojected_points

def main():
    # control defines
    # Segment 1 of Tot_20231121
    session = 'Tot_20231121'
    segment = 1
    frame_i = 0
    bodypart = 'Index_T'

    # derived
    parentdir = r'R:\ProjectFolders\Prehension\JARVIS\Datasets\Tot_trainingset_20240212_added'
    # calibdir = os.path.join(parentdir, 'calib_params', session)
    calibdir = os.path.join(parentdir, 'data', session, 'CalibrationParameters')
    datadir = os.path.join(parentdir, 'data', session, 'data')

    # find cameras and load calibrations
    cameras = [os.path.splitext(os.path.basename(f))[0]
               for f in glob.glob(os.path.join(calibdir, '*.yaml'))]
    calibs = {}
    for camera in cameras:
        calibs[camera] = load_calibration(os.path.join(calibdir, camera + '.yaml'))
    print('Found {} cameras: {}'.format(len(cameras), ', '.join(cameras)))

    # find images
    segment_imagesdir = os.path.join(datadir, f'Segment {segment}')

    # find frame numbers
    frames = glob.glob(os.path.join(segment_imagesdir, cameras[0], 'Frame_*.jpg'))
    frames = [os.path.splitext(os.path.basename(f))[0][6:]
              for f in frames]
    frame = frames[frame_i]
    print('Found {} frames: {}. Using: {}'.format(
        len(frames), ', '.join(frames), frame))
    frames = {c: os.path.join(segment_imagesdir, c, f'Frame_{frame}.jpg')
              for c in cameras}

    # load points
    points = {}
    for camera in cameras:
        points[camera] = load_point(
            os.path.join(segment_imagesdir, camera, 'annotations.csv'),
            f'Frame_{frame}.jpg', bodypart)
    print('Found points:')
    for camera, point in points.items():
        print(f'\t{camera}: ({point[0]}, {point[1]})')

    # triangulate, reproject and display results
    print('Without undistortion:')
    triangulate_reproject_points(calibs, points, frames, undistort_images=False)
    plt.suptitle('Without undistortion')

if __name__ == '__main__':
    main()

    plt.show()
nishbo commented 1 day ago

I have recreated the same approach with a different dataset utilizing both Jarvis triangulation and a different one (https://github.com/CMGreenspon/NCams), both using OpenCV, but in a slightly different form: image

I got the same shift, albeit smaller, possibly due to a larger number of cameras and slightly better quality of both cameras and triangulation. From here I will assume that this is a generic problem of the OpenCV methods of triangulation, please tell me if anyone has any better approaches or ideas on how to solve it for high-precision tracking of fingers.