Closed EchoTHChen closed 1 year ago
Thanks for your reply! Yes, the re-projection is right now after doing that.
I have another question. I'm very confused why translation of panorama do not need to transpose the coordinates. Shouldn't the translation of panorama and cube-maps be consistent in translation?
I hope to render a panorama by perspective methods (such as NeRF and its variants) with the consistent translation and world coordinates between cube-maps and panorama. Now it seems that it is not consistent, caused by the problem above.
I write the following code and find that the world coordinates of key-points are not consistent.
keypoints_world.npz
source panorama view: src_1.jpg
target panorama view: target_2.jpg
"""
1.extract keypoints from panorama view 1
2.write keypoints to panorama view 2
3.save keypoints in world coordinates!
"""
import numpy as np
import torch
import cv2
keypoints_idx = 1 #source view idx
root_dir="./"
target_idx = 2# target view idx
EPS=1e-5
w = 512
h = 256
def np_cartesian_to_spherical(xyz, eps=None, linearize_angle=np.deg2rad(10)):
x = xyz[:, 0]
y = xyz[:, 1]
z = xyz[:, 2]
theta = np.arctan2(z, x)
radius = np.linalg.norm(xyz, axis=-1)
y_over_r = y / radius
phi = np.arccos(y_over_r)
return theta, phi, radius
def np_spherical_to_cartesian(theta, phi, r):
tmp = r * np.sin(phi)
x = tmp * np.cos(theta)
y = r * np.cos(phi)
z = tmp * np.sin(theta)
return np.stack((x, y, z), axis=-1)
def _get_data(idxs):
data = np.load(root_dir+'/test_data.npz')
rots = data["rots"][:, idxs, ...]
trans = data["trans"][:, idxs, ...]
# trans = trans[..., [2,0,1]]
panos = data["panos"][:, idxs, ...]
depths = data["depths"][:, idxs, ...]
return rots, trans, panos, depths#, pred_depths
def _load_matterport3d():
idxs = [0, 1, 2]
rots, trans, panos, depths = _get_data(idxs)
def get_poses(rots, trans):
seq_len = rots.shape[1]
bottom = np.array([[0.0, 0.0, 0.0, 1.0]])
pose_list = []
for idx in range(seq_len):
rot = rots[0, idx, ...]
tr = trans[0, idx, ...]
pose = torch.from_numpy(np.concatenate([np.concatenate([rot, tr[..., np.newaxis]], axis=1), bottom], axis=0))#w2c
pose = torch.inverse(pose) #c2w
pose = pose[:3, :]
pose_list.append(pose)
pose_list = torch.stack(pose_list)
return pose_list
poses = get_poses(rots, trans)
def get_colors(panos):
seq_len = panos.shape[1]
panos = panos[0, ...]
s_panos = []
for idx in range(seq_len):
s_panos.append(cv2.resize(panos[idx], (w, h), cv2.INTER_LINEAR))
s_panos = np.stack(s_panos)
panos = torch.from_numpy(s_panos)
panos = panos[:, :, :, [2, 1, 0]]
return panos
colors = get_colors(panos)
s_depths = torch.from_numpy(depths[0])
return poses, colors, s_depths
#reprojection
def projection():
poses, color, depth = _load_matterport3d()
panos_np = color.data.numpy()
depths_np = depth.data.numpy()
poses_np = poses.data.numpy()
H = panos_np.shape[1]
W = panos_np.shape[2]
gray = cv2.cvtColor(np.uint8(panos_np[keypoints_idx, ...]*255), cv2.COLOR_BGR2GRAY)
sift = cv2.xfeatures2d.SIFT_create(nfeatures=200, contrastThreshold=0.02, edgeThreshold=20)
kp = sift.detect(gray, None)
print("len(kp):", len(kp))
keypoints = [[int(kp[idx].pt[0]), int(kp[idx].pt[1])] for idx in range(len(kp))]
keypoints = np.array(keypoints)
# keypoints = np.stack([key_points[:, 1] ,key_points[:, 0]], axis=1)
pano_src = np.uint8(panos_np[keypoints_idx, ...]*255)
for i in range(len(keypoints)):
coord_x, coord_y = keypoints[i, 0], keypoints[i, 1]
print("coord_x, coord_y:", coord_x, coord_y)
cv2.circle(pano_src, (int(coord_x), int(coord_y)), color = (0, 0, 255), thickness = 1, radius=1)
cv2.imwrite("src_"+str(keypoints_idx)+".jpg", pano_src)
r = depths_np[keypoints_idx, keypoints[:, 1], keypoints[:, 0]]#N, 1
x = keypoints[:, 0]#N, 1
y = keypoints[:, 1]#N, 1
theta = x / W * 2 * np.pi
theta = np.clip(theta, EPS, 2*np.pi-EPS) - 1.5*np.pi#
phi = y / H * np.pi
phi = np.clip(phi, EPS, np.pi-EPS)
verts = np_spherical_to_cartesian(theta, phi, r)#N, 3
N = verts.shape[0]
verts_homo = np.concatenate([verts.T, np.ones((1, N))], axis=0)#4, N
pose_c2w_src = poses_np[keypoints_idx]#3, 4
verts_world = pose_c2w_src @ verts_homo #3, N #np.matmul(m_rot, verts.T) - np.repeat(trans_src[:,np.newaxis], N, axis=1)#3,N - 3, N
np.savez("keypoints_world.npz", kp=verts_world)
#world to camera
pose_c2w_target = poses_np[target_idx]#3, 4
bottom = np.array([[0, 0, 0, 1]])
pose_c2w_target_sqr =np.concatenate([pose_c2w_target, bottom], axis=0)#4,4
pose_w2c_target_sqr = np.linalg.inv(pose_c2w_target_sqr)
verts_world_homo = np.concatenate([verts_world, np.ones((1, N))], axis=0)#4,N
verts_target = pose_w2c_target_sqr[:3, :] @ verts_world_homo#3, N
theta, phi, radius = np_cartesian_to_spherical(verts_target.T)
theta = (theta + 1.5*np.pi) % (2*np.pi)
x = theta / (2*np.pi) * W
y = phi / (np.pi) * H
pano_target = np.uint8(panos_np[target_idx, ... ]*255)
for i in range(len(x)):
coord_x, coord_y = x[i], y[i]
cv2.circle(pano_target, (int(coord_x), int(coord_y)), color = (0, 0, 255), thickness =1, radius=1)
cv2.imwrite("target_"+str(target_idx)+".jpg", pano_target)
projection()
keypoints_world.npz
(world coordinates) from panorama produced by the previous code, and project those keypoints-3D to each perspective view. (I find that the re-projected keypoints are wrong because of the inconsistent world coordinates.)examples of other perspective views:
"""
1.read keypoints-3D (world coordinates) from panorama produced by `debug/debug.py`.
2.project those keypoints-3D to each perspective view.
(translations has been revised by trans = trans[..., [2, 0, 1]])
"""
import os
import numpy as np
import cv2
# read data
kp_name = "from_pano"#from_perspec or from_pano
data = np.load("./test_data.npz", allow_pickle=True)
rgb_pano = data["panos"]
rgb_cubes = data["rgb_cubes"]#1,3,6, 256, 256, 3
depth_cubes = data["depth_cubes"] # 1,3,6, 256, 256,1
trans_cubes = data["trans_cubes"]#1, 3, 6, 3
trans_cubes = trans_cubes[:, :, :, [2,0,1]]
rots_cubes = data["rots_cubes"]
H, W = rgb_cubes.shape[3:5]
#source view
pano_src_idx = 1 # 1-th pano
cube_idx = 4 # 4-th cube of 1-th pano
current_rgb = np.uint8(rgb_cubes[0, pano_src_idx, cube_idx]*255)
current_depth = depth_cubes[0, pano_src_idx, cube_idx]
current_trans = trans_cubes[0, pano_src_idx, cube_idx]
current_rots = rots_cubes[0, pano_src_idx, cube_idx]
# intrinsic parameters
FOV=90
f = 0.5 * W * 1 / np.tan(0.5 * FOV / 180.0 * np.pi)
cx = (W - 1) / 2.0
cy = (H - 1) / 2.0
K = np.array([
[f, 0, cx],
[0, f, cy],
[0, 0, 1],
], np.float32)
print("K:", K)
#get keypoints in source views
gray = cv2.cvtColor(np.uint8(current_rgb*255), cv2.COLOR_BGR2GRAY)
sift = cv2.xfeatures2d.SIFT_create(nfeatures=200, contrastThreshold=0.02, edgeThreshold=20)
kp = sift.detect(gray, None)
print("len(kp):", len(kp))
keypoints = [[int(kp[idx].pt[0]), int(kp[idx].pt[1])] for idx in range(len(kp))]
keypoints = np.array(keypoints)
os.makedirs("vis_bug", exist_ok=True)
cv2.imwrite("vis_bug/src_orig.jpg", current_rgb)
d_min = current_depth.min()
d_max = current_depth.max()
d_gray=np.uint8((current_depth-d_min)/(d_max - d_min)*255)
d_color = cv2.applyColorMap(d_gray, cv2.COLORMAP_JET)
cv2.imwrite("vis_bug/d_color.jpg", d_color)
z_depth = current_depth[keypoints[: , 1], keypoints[:, 0], :]
valid_list = np.where(z_depth>0)[0]# keep only keypoints with positive-depth
z_depth = z_depth[valid_list]#n, 1
keypoints = keypoints[valid_list] #n ,2
#write keypoints in source view
for i in range(len(keypoints)):
coord_x, coord_y = keypoints[i, 0], keypoints[i, 1]
cv2.circle(current_rgb, (int(coord_x), int(coord_y)), color = (0, 0, 255), thickness = 1, radius=1)
cv2.imwrite("vis_bug/src.jpg", current_rgb)
#get world coordinates of keypoints
N, _ = keypoints.shape
points_homo = np.concatenate([keypoints, np.ones((N, 1))], axis=1)#N, 3
points_cam_norm = np.linalg.inv(K)@points_homo.T #3x3, 3xN->3xN
points_cam = points_cam_norm*(z_depth.T).repeat([3], axis=0)#3xN, 1,N->3xN
current_pose = np.concatenate([current_rots, current_trans.reshape(3, 1)], axis=1)
current_pose = np.concatenate([current_pose, np.array([[0, 0, 0, 1]])], axis=0)#w2c
points_w = np.linalg.inv(current_pose)@ np.concatenate([points_cam, np.ones((1, points_cam.shape[1]))], axis=0)
if kp_name == "from_pano":
points_w = np.load("keypoints_world.npz")["kp"]
elif kp_name=="from_perspec":
pass
num_pano_views = rgb_cubes.shape[1]#3
num_cube_views = rgb_cubes.shape[2]#6
for i in range(num_pano_views):
for j in range(num_cube_views):
print("i, j:", i,j)
rgb_other = np.uint8(rgb_cubes[0, i, j]*255)
trans_other = trans_cubes[0, i, j].reshape(3, 1)
rots_other = rots_cubes[0, i, j]
points_cam_other = rots_other@points_w[:3, :]+trans_other
points_coords_h = K @ points_cam_other[:3, :]#()@(3,N)->3,N
valid_list = np.where(points_cam_other[2, :]>0)[0]
points_coords_h = points_coords_h[:, valid_list]
points_coords = (points_coords_h/points_coords_h[2:, :]).T #->N,2
cv2.imwrite("vis_bug/other_"+str(i)+"_"+str(j)+"_orig.jpg", rgb_other)
for coord_i in range(len(points_coords)):
coord_x, coord_y = points_coords[coord_i, 0], points_coords[coord_i, 1]
if coord_x >=0 and coord_x < W and coord_y >=0 and coord_y < H:
cv2.circle(rgb_other, (int(coord_x), int(coord_y)), color = (0, 0, 255), thickness = 1, radius=1)
cv2.imwrite("vis_bug/other_"+str(i)+"_"+str(j)+".jpg", rgb_other)
How can I revise my code to make the coordinates consistent between panorama and cube-maps? Thanks!
Question:
Sorry to bother you again!
I want to use the code of
data_readers/create_rgb_dataset.py
to split panorama into cube-maps. (to run other perspective image methods on habitat-matterport3d)But I seem to get wrong translations and rotations. After I get the cube-map images, cube-map z-depths, rotations and translations, I try to re-project the key-points with its z-depth information from the perspective source view to other perspective views. But the visualization results are wrong.
Source View: The source perspective view and extracted sift key-points from it:
Other Views: The examples of re-projected key-points in other perspective views:
It seems the poses of cube-maps are wrong. But I don't know where my code went wrong. I would like to ask for your help.
Code:
1. generate cube-maps
I get cube-maps according to the following code by revising the
get_vector_sample
function ofdata_readers/create_rgb_dataset.py
:2. read cube-map data and save as
test_data.npz
3. get key-points from source view and write key-points to other views:(need to
pip install opencv-python opencv-contrib-python
first)I would be grateful if you could help me!