Closed liuxiaoyu1104 closed 1 hour ago
class Camera(object):
def __init__(self, entry, pose_file_name, image_scale=(1920, 1080)):
assert len(entry) == 8, f"length of entry should be 8, got {len(entry)}"
if image_scale[0] > image_scale[1]:
self.fx = 0.688
self.fy = self.fx * (image_scale[0] / image_scale[1])
self.cx = 0.5
self.cy = 0.5
else:
self.fy = 0.688
self.fx = self.fy * (image_scale[1] / image_scale[0])
self.cx = 0.5
self.cy = 0.5
# Read the timestamp and pose information from the entry
self.timestamp = entry[0]
tx, ty, tz = entry[1:4]
qx, qy, qz, qw = entry[4:8]
# Convert quaternion to rotation matrix
rotation_matrix = self.quaternion_to_rotation_matrix(qx, qy, qz, qw)
# Create the translation vector
translation_vector = np.array([tx, ty, tz])
if "synthetic" in pose_file_name:
# Create the world-to-camera transformation matrix
self.w2c_mat = np.eye(4)
self.w2c_mat[:3, :3] = rotation_matrix
self.w2c_mat[:3, 3] = translation_vector
# Invert the matrix to get the camera-to-world transformation matrix
self.c2w_mat = np.linalg.inv(self.w2c_mat)
elif "pexels" in pose_file_name:
# Create the camera-to-world transformation matrix
self.c2w_mat = np.eye(4)
self.c2w_mat[:3, :3] = rotation_matrix
self.c2w_mat[:3, 3] = translation_vector
# Invert the matrix to get the world-to-camera transformation matrix
self.w2c_mat = np.linalg.inv(self.c2w_mat)
else:
raise ValueError(f"Unknown camera pose file name: {pose_file_name}")
@staticmethod
def quaternion_to_rotation_matrix(qx, qy, qz, qw):
# Convert a quaternion to a rotation matrix
# Using the formula from https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
R = np.array([
[1 - 2*qy**2 - 2*qz**2, 2*qx*qy - 2*qz*qw, 2*qx*qz + 2*qy*qw],
[2*qx*qy + 2*qz*qw, 1 - 2*qx**2 - 2*qz**2, 2*qy*qz - 2*qx*qw],
[2*qx*qz - 2*qy*qw, 2*qy*qz + 2*qx*qw, 1 - 2*qx**2 - 2*qy**2]
])
return R
This is my camera process code for quaternions. The other part of camera should be similar to CameraCTRL's.
Sorry that the 1st-author is busy at coursework stuffs recently, so the organized code will be released later.
How do I convert it from TUM Camera coordinate to RealEstate10k coordinate (should be COLMAP coordinate)? In your dataset, timestamp tx ty tz qx qy qz qw are converted to the w2c matrix as follows: tensor([[-1., 0., 0., -0.], [ 0., -1., -0., -0.], [-0., 0., 1., -0.], [ 0., 0., 0., 1.]]) It seems inconsistent with w2c matrix provided by the RealEstate10k dataset's w2c matrix. Is there any coordinate transformation involved?
Below is the code I used to obtain the w2c matrix: from scipy.spatial.transform import Rotation qvec = np.array(tuple(map(float, entry[4:8]))) tvec = np.array(tuple(map(float, entry[1:4]))) R=Rotation.from_quat([qvec[0],qvec[1],qvec[2],qvec[3]]).as_matrix() t = tvec.reshape([3,1]) bottom = np.array([0.0, 0.0, 0.0, 1.0]).reshape([1, 4]) w2c_mat_4x4 = np.concatenate([np.concatenate([R, t], 1), bottom], 0)