Open Sarthak-22 opened 2 years ago
Hi, this is rendered with open3D. Here is an example script. Note that it only renders bounding primitives with valid instance IDs for efficiency. You can adapt the code to render all bounding primitives.
import sys
import glob
import os
import subprocess
import copy
import numpy as np
import open3d
import matplotlib.cm
import matplotlib.pyplot as plt
from scipy import interpolate
from kitti360scripts.helpers.annotation import Annotation3D, global2local
from kitti360scripts.helpers.project import Camera
from kitti360scripts.helpers.labels import name2label, id2label, kittiId2label
class Video3D(object):
def __init__(self, seq=0):
# The sequence of the image we currently working on
self.currentSequence = ""
# Image extension
self.imageExt = ".png"
# Filenames of all images in current city
self.images = []
self.imagesCityFull = []
# Ground truth type
self.gtType = 'semantic'
# Add contour to semantic map
self.semanticCt = True
# Add contour to instance map
self.instanceCt = True
# The object that is highlighted and its label. An object instance
self.highlightObj = None
self.highlightObjSparse= None
self.highlightObjLabel = None
# The current object the mouse points to. It's index in self.labels
self.mouseObj = -1
# The current object the mouse points to. It's index in self.labels
self.mousePressObj = -1
self.mouseSemanticId = -1
self.mouseInstanceId = -1
# colormap
self.cmap = matplotlib.cm.get_cmap('Set1')
self.cmap_length = 9
# show camera or not
self.showCamera = False
self.downSampleEvery = -1
# show bbox wireframe or mesh
self.showWireframe = False
self.show3DInstanceOnly = True
# show static or dynamic point clouds
self.showStatic = True
# show visible point clouds only
self.showVisibleOnly = False
if 'KITTI360_DATASET' in os.environ:
kitti360Path = os.environ['KITTI360_DATASET']
else:
kitti360Path = os.path.join(os.path.dirname(
os.path.realpath(__file__)), '..', '..')
sequence = '2013_05_28_drive_%04d_sync' % seq
imagePath = os.path.join(kitti360Path, 'data_2d_raw')
label2DPath = os.path.join(kitti360Path, 'data_2d_semantics', 'train')
label3DBboxPath = os.path.join(kitti360Path, 'data_3d_bboxes')
self.annotation3D = Annotation3D(label3DBboxPath, sequence)
if 'KITTI360_DATASET' in os.environ:
kitti360Path = os.environ['KITTI360_DATASET']
else:
kitti360Path = os.path.join(os.path.dirname(
os.path.realpath(__file__)), '..', '..')
cameraId = 0
sequence = '2013_05_28_drive_%04d_sync' % seq
self.sequence = sequence
self.seqId = seq
self.cam2worldPath = os.path.join(kitti360Path, 'data_poses', sequence, 'cam0_to_world.txt')
self.outputPath = os.path.join(kitti360Path, 'video3d', sequence)
os.makedirs(self.outputPath, exist_ok=True)
self.pointClouds = {}
self.Rz = np.eye(3)
self.bboxes = []
self.accumuData = []
def getColor(self, idx):
if idx==0:
return np.array([0,0,0])
return np.asarray(self.cmap(idx % self.cmap_length)[:3])*255.
def assignColor(self, globalIds, gtType='semantic'):
if not isinstance(globalIds, (np.ndarray, np.generic)):
globalIds = np.array(globalIds)[None]
color = np.zeros((globalIds.size, 3))
for uid in np.unique(globalIds):
semanticId, instanceId = global2local(uid)
if gtType=='semantic':
color[globalIds==uid] = id2label[semanticId].color
elif instanceId>0:
color[globalIds==uid] = self.getColor(instanceId)
else:
color[globalIds==uid] = (96,96,96) # stuff objects in instance mode
color = color.astype(np.float)/255.0
return color
def getCamTrajectory(self):
self.camDistance = 0
cam2world = np.loadtxt(self.cam2worldPath)
cam2world = np.reshape(cam2world[:,1:],(-1,4,4))
T = cam2world[:,:3,3]
self.numFrames = T.shape[0]
color = np.array([0.75,0.,0.]).reshape((1,3))
radius = 1.5
for i in range(T.shape[0]-1):
self.camDistance += np.linalg.norm(T[i+1]-T[i])
def globalRotation(self):
cam2world = np.loadtxt(self.cam2worldPath)
cam2world = np.reshape(cam2world[:,1:],(-1,4,4))
T = cam2world[:,:3,3]
lx = np.max(T[:,0]) - np.min(T[:,0])
ly = np.max(T[:,1]) - np.min(T[:,1])
max_ratio = lx/ly
Rz = np.eye(3)
for theta in np.arange(0, 2*np.pi, 0.1):
tmpRz = np.asarray([[np.cos(theta), -np.sin(theta), 0],
[np.sin(theta), np.cos(theta), 0],
[0, 0, 1]])
T_new = (tmpRz @ T.transpose()).transpose()
lx = np.max(T_new[:,0]) - np.min(T_new[:,0])
ly = np.max(T_new[:,1]) - np.min(T_new[:,1])
if lx/ly>max_ratio:
Rz = tmpRz
max_ratio = lx/ly
self.Rz = Rz
def loadBoundingBoxes(self):
for globalId,v in self.annotation3D.objects.items():
# skip dynamic objects
if len(v)>1:
continue
for obj in v.values():
lines=np.array(obj.lines)
vertices=obj.vertices
faces=obj.faces
mesh = open3d.geometry.TriangleMesh()
mesh.vertices = open3d.utility.Vector3dVector(obj.vertices)
mesh.triangles = open3d.utility.Vector3iVector(obj.faces)
color = self.assignColor(globalId, 'semantic')
semanticId, instanceId = global2local(globalId)
mesh.paint_uniform_color(color.flatten())
mesh.compute_vertex_normals()
self.bboxes.append( mesh )
def lookat(self, look_from, look_to, tmp = np.asarray([0, 0, 1])):
forward = - look_from + look_to
forward = forward / np.linalg.norm(forward)
right = -np.cross(tmp, forward)
up = np.cross(forward, right)
camToWorld = np.zeros((4,4))
camToWorld[0,0:3] = right
camToWorld[1,0:3] = up
camToWorld[2,0:3] = forward
camToWorld[3,0:3] = look_from
camToWorld[3,3] = 1
return camToWorld
def generateCameraPoses(self, num_interpolated_frames=500):
cam2world = np.loadtxt(self.cam2worldPath)
cam2world = np.reshape(cam2world[:,1:],(-1,4,4))
# a higher camera
cam2world[:,2,3] = cam2world[:,2,3] + 1.
cam2world = cam2world[50:,:]
locs = cam2world[:,:3,3]
center = np.median(locs, axis=0)
# extend exist camera trajectory
tck, u = interpolate.splprep([locs[:50,0],locs[:50,1],locs[:50,2]], s=2)
x_knots, y_knots, z_knots = interpolate.splev(tck[0], tck)
u_ext = np.linspace(-2.5,0,50)
locs_ext = interpolate.splev(u_ext, tck)
locs_ext = np.asarray(locs_ext).T
c = center
d = center-locs[0,:]
keypoints = locs
tck, u = interpolate.splprep([keypoints[:,0],keypoints[:,1],keypoints[:,2]]) #s=2
x_knots, y_knots, z_knots = interpolate.splev(tck[0], tck)
u_fine = np.linspace(0,1,num_interpolated_frames)
x_fine, y_fine, z_fine = interpolate.splev(u_fine, tck)
locs_new = np.asarray([x_fine, y_fine, z_fine]).T
startIdx = np.argmin(np.linalg.norm(locs_new - locs[0,:], axis=1))
cams = []
for idx,(x,y,z) in enumerate(zip(x_fine, y_fine, z_fine)):
if idx==len(x_fine)-1:
continue
look_to = center + (np.array([x_fine[idx+1], y_fine[idx+1], z_fine[idx+1]]) - center)
cams.append(self.lookat(np.asarray([x,y,z]), look_to).transpose())
cams = np.asarray(cams)
cams = np.reshape(cams, (cams.shape[0], 16))
self.extrinsics = cams
def custom_draw_geometry_with_camera_trajectory(pcd, extrinsics, gtType, outputPath, lineSets=[], showCamera=False, width = 1920, height = 1080):
if not showCamera:
outputPath = os.path.join(outputPath, gtType)
os.makedirs(outputPath, exist_ok=True)
else:
outputPath = os.path.join(outputPath, '%s_cam' % gtType)
os.makedirs(outputPath, exist_ok=True)
print(outputPath)
custom_draw_geometry_with_camera_trajectory.index = -1
trajectory = open3d.camera.PinholeCameraTrajectory()
cam_parameters =open3d.camera.PinholeCameraParameters()
focal = 935.30743608719376
K = [[focal , 0, width / 2 - 0.5],
[0, focal, height / 2 - 0.5],
[0, 0, 1]]
cam_parameters.intrinsic.width=width
cam_parameters.intrinsic.height=height
cam_parameters.intrinsic.intrinsic_matrix=K
for i in range(extrinsics.shape[0]):
para = copy.deepcopy(cam_parameters)
ext = extrinsics[i,:]
ext = np.reshape(ext, (4,4))
ext2 = np.zeros((4,4))
ext2[:3,:3] = ext[:3,:3].transpose()
ext2[:3, 3:] = -np.matmul(ext2[:3,:3], ext[:3,3:])
para.extrinsic = ext2
trajectory.parameters = trajectory.parameters + [para]
custom_draw_geometry_with_camera_trajectory.trajectory = trajectory
custom_draw_geometry_with_camera_trajectory.vis = open3d.visualization.Visualizer()
def move_forward(vis):
# This function is called within the Visualizer::run() loop
# The run loop calls the function, then re-render
# So the sequence in this function is to:
# 1. Capture frame
# 2. index++, check ending criteria
# 3. Set camera
# 4. (Re-render)
ctr = vis.get_view_control()
ctr.set_constant_z_near(0.5)
glb = custom_draw_geometry_with_camera_trajectory
if glb.index >= 0:
print("Capture image {:05d}".format(glb.index))
image = vis.capture_screen_float_buffer(False)
plt.imsave(os.path.join(outputPath, "{:05d}.png".format(glb.index)),\
np.asarray(image), dpi = 1)
glb.index = glb.index + 1
if glb.index < len(glb.trajectory.parameters):
ctr.convert_from_pinhole_camera_parameters(glb.trajectory.parameters[glb.index])
else:
custom_draw_geometry_with_camera_trajectory.vis.\
register_animation_callback(None)
return False
vis = custom_draw_geometry_with_camera_trajectory.vis
vis.create_window(width=width, height=height)
if isinstance(pcd, list):
for pcd_ in pcd:
vis.add_geometry(pcd_)
elif isinstance(pcd, dict):
for pcd_ in pcd.values():
vis.add_geometry(pcd_)
else:
vis.add_geometry(pcd)
if len(lineSets):
for lineSet in lineSets:
vis.add_geometry(lineSet)
vis.get_render_option().background_color = np.asarray([0,0,0])
vis.get_render_option().light_on = 1
vis.register_animation_callback(move_forward)
vis.run()
vis.destroy_window()
if __name__=='__main__':
seq=3
if len(sys.argv)>1:
seq=int(sys.argv[1])
v = Video3D(seq)
v.generateCameraPoses()
v.globalRotation()
v.loadBoundingBoxes()
v.getCamTrajectory()
bboxes = v.lineSets if v.showWireframe else v.bboxes
######################
# video
######################
colorType = 'bbox'
custom_draw_geometry_with_camera_trajectory(v.pointClouds, v.extrinsics, colorType, v.outputPath, bboxes, v.showCamera)
The current dataset for 3D bounding box data supports xml format. I am able to visualize it using KITTI360Viewer3D.py
However, a 3D output with 3D bboxes is showing up in open3D window. I need a 2D output similar to the one shown in KITTI360 dataset website (attached below) Is there any solution or a simple code for this? I am not very familiar with Open3D so any simple solution would suffice.
Thank You