Open WangJuan6 opened 2 years ago
@alanlou perhaps you can help here.
It will take some (minor) modification, but here is how you can do 3d reconstruction from the 3d sensor:
import open3d
import open3d as o3d
import open3d.core as o3c
import numpy as np
# Fixing the frustum clearing issue
#
# We don't have this functionality supported, but since you are using the python API I guess you can do this manually:
#
# * you first create a frustum volume in the camera coordinate system by unprojecting the imaging plane to a list of discretized depths and reshape it to (N, 3) (only once)
# *then you rigid transform the volume to the world coordinate system (once per iteration)
# * finally you turn it into a Open3D Tensor and feed it to the function in need.
class Slam:
def __init__(
self,
intrinsic,
features = ["color", "seg", "ins_seg"],
feature_sizes = [(3), (1), (1)],
device="CUDA:0"):
self.device = o3c.Device(device)
voxel_size = 3.0 / 512
self.features = features
self.feature_sizes = feature_sizes
attr_names = ["tsdf", "weight"] + features
attr_sizes = [(1), (1) ] + feature_sizes
attr_dtypes = [o3c.float32] * len(attr_sizes)
self.vbg = o3d.t.geometry.VoxelBlockGrid(
attr_names=attr_names,
attr_dtypes=attr_dtypes,
attr_channels=attr_sizes,
voxel_size=voxel_size,
block_resolution=16,
block_count=50000,
device=self.device,
)
self.trunc = voxel_size * 4
self.depth_max = 5.0
self.depth_scale = 1.0
self.intrinsic = o3c.Tensor(intrinsic, o3c.Dtype.Float64)
self.intrinsic_dev = o3c.Tensor(
intrinsic, device=self.device, dtype=o3c.float32
)
pass
@staticmethod
def convert_opengl_to_world(ex):
return np.array(
[
[ex[0, 0], ex[0, 1], ex[0, 2], ex[0, 3]],
[-ex[1, 0], -ex[1, 1], -ex[1, 2], -ex[1, 3]],
[-ex[2, 0], -ex[2, 1], -ex[2, 2], -ex[2, 3]],
[0, 0, 0, 1],
]
)
def integrate(self, extrinsic, depth, features):
extrinsic = self.convert_opengl_to_world(extrinsic)
extrinsic = o3c.Tensor(extrinsic, dtype=o3c.float64)
depth = open3d.t.geometry.Image(np.ascontiguousarray(depth))
depth = depth.to(self.device)
frustum_block_coords = self.vbg.compute_unique_block_coordinates(
depth, self.intrinsic, extrinsic, self.depth_scale, self.depth_max
)
# Activate them in the underlying hash map (may have been inserted)
self.vbg.hashmap().activate(frustum_block_coords)
# Find buf indices in the underlying engine
buf_indices, _ = self.vbg.hashmap().find(frustum_block_coords)
o3c.cuda.synchronize()
voxel_coords, voxel_indices = self.vbg.voxel_coordinates_and_flattened_indices(
buf_indices
)
self.voxel_coords = voxel_coords
o3c.cuda.synchronize()
# Now project them to the depth and find association
# (3, N) -> (2, N)
extrinsic_dev = extrinsic.to(self.device, o3c.float32)
xyz = extrinsic_dev[:3, :3] @ voxel_coords.T() + extrinsic_dev[:3, 3:]
uvd = self.intrinsic_dev @ xyz
d = uvd[2]
u = (uvd[0] / d).round().to(o3c.int64)
v = (uvd[1] / d).round().to(o3c.int64)
o3c.cuda.synchronize()
mask_proj = (
(d > 0) & (u >= 0) & (v >= 0) & (u < depth.columns) & (v < depth.rows)
)
# For visualization
self.columns = depth.columns
self.rows = depth.rows
self.extrinsic = extrinsic
self.frustum_block_coords = frustum_block_coords
v_proj = v[mask_proj]
u_proj = u[mask_proj]
d_proj = d[mask_proj]
depth_readings = (
depth.as_tensor()[v_proj, u_proj, 0].to(o3c.float32) / self.depth_scale
)
sdf = depth_readings - d_proj
mask_inlier = (
(depth_readings > 0)
& (depth_readings < self.depth_max)
& (sdf >= -self.trunc)
)
sdf[sdf >= self.trunc] = self.trunc
sdf = sdf / self.trunc
o3c.cuda.synchronize()
weight = self.vbg.attribute("weight").reshape((-1, 1))
tsdf = self.vbg.attribute("tsdf").reshape((-1, 1))
valid_voxel_indices = voxel_indices[mask_proj][mask_inlier]
w = weight[valid_voxel_indices]
wp = w + 1
tsdf[valid_voxel_indices] = (
tsdf[valid_voxel_indices] * w + sdf[mask_inlier].reshape(w.shape)
) / (wp)
### Add for loop here to iterate over properties
for idx, feat in enumerate(self.features):
feat_tensor = o3c.Tensor(features[feat], dtype=o3c.float32, device=self.device)
if feat == "color":
### Open3D normalize color, need to rescale between 0-255
feat_tensor *= 255
feat_tensor_readings = feat_tensor[v_proj, u_proj]
# Dictionary keys must be in same order as feature sizes
shape = self.feature_sizes[idx]
feat_tensor = self.vbg.attribute(feat).reshape((-1, shape))
feat_tensor[valid_voxel_indices] = feat_tensor_readings[mask_inlier]
weight[valid_voxel_indices] = wp
o3c.cuda.synchronize()
return self.vbg
def visualize(self, extrinsic=None):
if extrinsic is None:
extrinsic = self.extrinsic
# examples/python/t_reconstruction_system/ray_casting.py
result = self.vbg.ray_cast(
block_coords=self.vbg.hashmap().key_tensor(),
intrinsic=self.intrinsic,
extrinsic=extrinsic,
width=self.columns,
height=self.rows,
render_attributes=["depth", "normal", "color", "index", "interp_ratio"],
depth_scale=self.depth_scale,
depth_min=0,
depth_max=10,
weight_threshold=1,
)
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2, 2)
# colorized depth
colorized_depth = o3d.t.geometry.image(result["depth"]).colorize_depth(
self.depth_scale, 0, 3
)
axs[0, 0].imshow(colorized_depth.as_tensor().cpu().numpy())
axs[0, 0].set_title("depth")
axs[0, 1].imshow(result["normal"].cpu().numpy())
axs[0, 1].set_title("normal")
axs[1, 0].imshow(result["color"].cpu().numpy())
axs[1, 0].set_title("color via kernel")
plt.show()
def visualize_frustum(self):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(self.voxel_coords.cpu().numpy())
o3d.visualization.draw(pcd)
return
if __name__ == "__main__":
import os
import ssg
from ssg.envs.igibson_env import iGibsonEnv
o3d.visualization.webrtc_server.enable_webrtc()
config_file = os.path.join(ssg.CONFIG_PATH, "config.yaml")
num_cpu = 1
env = iGibsonEnv(
config_file=config_file,
mode="headless",
action_timestep=1 / 10.0,
physics_timestep=1 / 40.0,
)
# Get the intrinsic matrix
intrinsic = env.simulator.renderer.get_intrinsics()
slam = Slam(intrinsic=intrinsic)
env.reset()
# 10 seconds
for i in range(100):
rgb, threed, seg, ins_seg = env.simulator.renderer.render_robot_cameras(
modes=("rgb", "3d", "seg", "ins_seg")
)
extrinsics = env.simulator.renderer.V
vbg = slam.integrate(
extrinsics,
depth = -threed[:, :, 2:3],
features = {
"color": rgb[:, :, :3],
"seg": seg[..., 0:1],
"ins_seg": ins_seg[..., 0:1],
}
)
action = np.array([0, 1])
state, reward, done, info = env.step(action)
# slam.visualize_frustum()
# slam.visualize()
# active_buf_indices = vbg.hashmap().active_buf_indices().to(o3c.int64)
# key_tensor = vbg.hashmap().key_tensor()[active_buf_indices]
# vbg.hashmap().activate(key_tensor)
# buf_indices, _ = vbg.hashmap().find(key_tensor)
pcd = vbg.extract_point_cloud()
pcd_block_coordinates = vbg.compute_unique_block_coordinates(pcd)
vbg.hashmap().activate(pcd_block_coordinates)
buf_indices, _ = vbg.hashmap().find(pcd_block_coordinates)
o3c.cuda.synchronize()
voxel_coords, voxel_indices = vbg.voxel_coordinates_and_flattened_indices(
buf_indices
)
color = vbg.attribute("color").reshape((-1, 3))[voxel_indices]
# Using active buf indices
# color.shape = SizeVector[54960128, 3]
# voxel_coords.shape = SizeVector[54960128, 3]
# Using pcd to get voxel_coords
# color.shape = SizeVector[48689152, 3]
# voxel_coords.shape = SizeVector[48689152, 3]
# Using pcd
# pcd.point["colors"].shape = SizeVector[1510288, 3]
# pcd = o3d.t.geometry.PointCloud()
# pcd.point["positions"] = voxel_coords
# pcd.point["colors"] = color / 255
o3d.visualization.draw(pcd)
# o3d.visualization.draw(vbg.extract_point_cloud())
Dear @mjlbach: Thanks a lot, the code works. I need to project the global semantic point cloud to the current frame, so I have to know how the coordinates are transformed. Can you help me to solve this problem? The action of the robot is set as forward or rotate, the extrinsic matrix can get from iGibson environment or calculate using orientation and position. My code is as follows:
import logging
import os
from sys import platform
import yaml
import matplotlib.pyplot as plt
import numpy as np
import torch
import igibson
from igibson.envs.igibson_env import iGibsonEnv
from igibson.render.profiler import Profiler
import open3d as o3d
np.random.seed(2022)
def create_from_depth_frame(depth_frame,camera_matrix):
depth_frame = np.squeeze(depth_frame)
xc = camera_matrix[0, 2] # cx
zc = camera_matrix[1, 2] # cy
fx = camera_matrix[0, 0]
fy = camera_matrix[1, 1]
depth_frame = torch.from_numpy(depth_frame).cuda()
x_points = torch.arange(-xc, xc).float().cuda()
x_vals = (depth_frame * x_points / (fx))
y_points = torch.arange(zc, -zc, -1).float().cuda()
y_vals = (depth_frame.T * y_points / (fy)).T
z_vals = depth_frame
coordinates = torch.stack((x_vals, y_vals, z_vals), axis=2)
coordinates = coordinates.cpu().numpy()
return coordinates
def main():
o3d.visualization.webrtc_server.enable_webrtc()
# config_filename = os.path.join(igibson.example_config_path, "husky_nav.yaml")
config_filename = os.path.join(igibson.example_config_path, "turtlebot_nav.yaml")
config_data = yaml.load(open(config_filename, "r"), Loader=yaml.FullLoader)
config_data["vertical_fov"] = 90
config_data["output"] = ["rgb", "depth"]
# Reduce texture scale for Mac.
if platform == "darwin":
config_data["texture_scale"] = 0.5
env = iGibsonEnv(config_file=config_data, mode="headless")
env.simulator.robots[0].set_position([10,10,0])
camera_matrix = env.simulator.renderer.get_intrinsics()
for j in range(100):
logging.info("Resetting environment")
env.reset()
for i in range(300):
with Profiler("Environment action step"):
# action = env.action_space.sample()
# state, reward, done, info = env.step([0, 3, 1, 6])
state, reward, done, info = env.step([1, 1])
rgb = state["rgb"]
depth = state["depth"]
# depth to pcd
point_cloud = create_from_depth_frame(depth, camera_matrix)
# transformation using extrinsics
extrinsics = env.simulator.renderer.V
R_m = extrinsics[:3, :3]
T_m = extrinsics[:3, 3:]
point_cloud[..., 0] -= T_m[0]
point_cloud[..., 1] -= T_m[1]
point_cloud[..., 2] -= T_m[2]
point_cloud_world_coordinate = R_m.T @ point_cloud.reshape(-1,3).transpose(1,0)
point_cloud_world_coordinate = point_cloud_world_coordinate.transpose(1,0)
# transformatin using position and orientation
# current_rotation = env.simulator.robots[0].eyes.get_orientation()
# current_position = env.simulator.robots[0].eyes.get_position()
# from scipy.spatial.transform import Rotation as R
# r = R.from_quat(current_rotation)
# rotation_matrix = r.as_matrix()
# point_cloud[..., 0] -= current_position[0]
# point_cloud[..., 1] -= current_position[1]
# point_cloud[..., 2] -= current_position[2]
# point_cloud_world_coordinate = rotation_matrix.T @ point_cloud.reshape(-1, 3).transpose(1, 0)
# point_cloud_world_coordinate = point_cloud_world_coordinate.transpose(1, 0)
# construct global point cloud
if i==0:
point_cloud_global = point_cloud_world_coordinate
rgb_global = rgb
else:
point_cloud_global = (np.append(point_cloud_global, point_cloud_world_coordinate, axis=0))
rgb_global = np.append(rgb_global, rgb, axis=0)
# visualization(plt or o3d)
# ax = plt.figure(0).add_subplot(projection='3d')
# try:
# ax.plot(np.ndarray.flatten(point_cloud[::, ::, 0]), np.ndarray.flatten(point_cloud[::, ::, 1]),
# np.ndarray.flatten(point_cloud[::, ::, 2]), 'b.', markersize=0.2)
# except:
# ax.plot(np.ndarray.flatten(point_cloud[::, 0]), np.ndarray.flatten(point_cloud[::, 1]),
# np.ndarray.flatten(point_cloud[::, 2]), 'b.', markersize=0.2)
# plt.title('point cloud')
#
# ax = plt.figure(1).add_subplot(projection='3d')
# try:
# ax.plot(np.ndarray.flatten(point_cloud_global[::, ::, 0]),
# np.ndarray.flatten(point_cloud_global[::, ::, 1]),
# np.ndarray.flatten(point_cloud_global[::, ::, 2]), 'b.', markersize=0.2)
# except:
# ax.plot(np.ndarray.flatten(point_cloud_global[::, 0]),
# np.ndarray.flatten(point_cloud_global[::, 1]),
# np.ndarray.flatten(point_cloud_global[::, 2]), 'b.', markersize=0.2)
# plt.title('point_cloud_global')
#
# plt.show()
if i>=40: # 40 step
pcd = o3d.geometry.PointCloud()
np_points = np.unique(point_cloud.reshape(-1, 3), axis=0)
print(type(np_points))
print(np_points.shape)
pcd.points = o3d.utility.Vector3dVector(point_cloud_global.reshape(-1, 3))
pcd.colors = o3d.utility.Vector3dVector(rgb_global.reshape(-1, 3))
print(np.asarray(pcd.points))
o3d.visualization.draw(pcd)
if __name__ == '__main__':
main()
thanks, Juan
Dear iGibson Team, I encountered a problem when I want to transform the point cloud from image coordinate to world coordinate. First, I generate point cloud from depth image; Second, I calculate the rotation matrix R using the orientation information from env.simulator.robots[0].get_orientation() and get the current position from env.simulator.robots[0].get_position() Third, I transform coordinate by: point_cloud_t_geometric = np.matmul(point_cloud_t.reshape(-1, 3), R.T).reshape(XYZ.shape) point_cloud_t_geometric[:, :, 0] += current_position[0] point_cloud_t_geometric[:, :, 1] += current_position[1] point_cloud_t_geometric[:, :, 2] += current_position[2] The results are as follows, red means the result at time t, and blue means the result at time t+1. Obviously, there is an offset in the point cloud at times t and t+1. How can I solve this problem? Thanks, Juan