autonomousvision / kitti360Scripts

This repository contains utility scripts for the KITTI-360 dataset.
MIT License
388 stars 64 forks source link

2D segmentation map for 3D bounding box data #68

Open Sarthak-22 opened 2 years ago

Sarthak-22 commented 2 years ago

The current dataset for 3D bounding box data supports xml format. I am able to visualize it using KITTI360Viewer3D.py

However, a 3D output with 3D bboxes is showing up in open3D window. I need a 2D output similar to the one shown in KITTI360 dataset website (attached below) Is there any solution or a simple code for this? I am not very familiar with Open3D so any simple solution would suffice.

Thank You

image

yiyiliao commented 2 years ago

Hi, this is rendered with open3D. Here is an example script. Note that it only renders bounding primitives with valid instance IDs for efficiency. You can adapt the code to render all bounding primitives.

import sys
import glob
import os
import subprocess
import copy
import numpy as np
import open3d
import matplotlib.cm
import matplotlib.pyplot as plt
from scipy import interpolate

from kitti360scripts.helpers.annotation  import Annotation3D, global2local
from kitti360scripts.helpers.project import Camera
from kitti360scripts.helpers.labels     import name2label, id2label, kittiId2label

class Video3D(object):
    def __init__(self, seq=0):

        # The sequence of the image we currently working on
        self.currentSequence   = ""
        # Image extension
        self.imageExt          = ".png"
        # Filenames of all images in current city
        self.images            = []
        self.imagesCityFull    = []
        # Ground truth type
        self.gtType            = 'semantic'
        # Add contour to semantic map
        self.semanticCt        = True
        # Add contour to instance map
        self.instanceCt        = True
        # The object that is highlighted and its label. An object instance
        self.highlightObj      = None
        self.highlightObjSparse= None
        self.highlightObjLabel = None
        # The current object the mouse points to. It's index in self.labels
        self.mouseObj          = -1
        # The current object the mouse points to. It's index in self.labels
        self.mousePressObj     = -1
        self.mouseSemanticId   = -1
        self.mouseInstanceId   = -1
        # colormap
        self.cmap = matplotlib.cm.get_cmap('Set1')
        self.cmap_length = 9 
        # show camera or not
        self.showCamera = False
        self.downSampleEvery = -1 
        # show bbox wireframe or mesh
        self.showWireframe = False
        self.show3DInstanceOnly = True
        # show static or dynamic point clouds
        self.showStatic = True
        # show visible point clouds only
        self.showVisibleOnly = False

        if 'KITTI360_DATASET' in os.environ:
            kitti360Path = os.environ['KITTI360_DATASET']
        else:
            kitti360Path = os.path.join(os.path.dirname(
                                os.path.realpath(__file__)), '..', '..')

        sequence = '2013_05_28_drive_%04d_sync' % seq
        imagePath = os.path.join(kitti360Path, 'data_2d_raw')
        label2DPath = os.path.join(kitti360Path, 'data_2d_semantics', 'train')
        label3DBboxPath = os.path.join(kitti360Path, 'data_3d_bboxes')
        self.annotation3D = Annotation3D(label3DBboxPath, sequence)

        if 'KITTI360_DATASET' in os.environ:
            kitti360Path = os.environ['KITTI360_DATASET']
        else:
            kitti360Path = os.path.join(os.path.dirname(
                                os.path.realpath(__file__)), '..', '..')

        cameraId = 0
        sequence = '2013_05_28_drive_%04d_sync' % seq
        self.sequence = sequence
        self.seqId = seq
        self.cam2worldPath  = os.path.join(kitti360Path, 'data_poses', sequence, 'cam0_to_world.txt')

        self.outputPath = os.path.join(kitti360Path, 'video3d', sequence)
        os.makedirs(self.outputPath, exist_ok=True)

        self.pointClouds = {}
        self.Rz = np.eye(3)
        self.bboxes = []
        self.accumuData = []

    def getColor(self, idx):
        if idx==0:
            return np.array([0,0,0])
        return np.asarray(self.cmap(idx % self.cmap_length)[:3])*255.

    def assignColor(self, globalIds, gtType='semantic'):
        if not isinstance(globalIds, (np.ndarray, np.generic)):
            globalIds = np.array(globalIds)[None]
        color = np.zeros((globalIds.size, 3))
        for uid in np.unique(globalIds):
            semanticId, instanceId = global2local(uid)
            if gtType=='semantic':
                color[globalIds==uid] = id2label[semanticId].color
            elif instanceId>0:
                color[globalIds==uid] = self.getColor(instanceId)
            else:
                color[globalIds==uid] = (96,96,96) # stuff objects in instance mode
        color = color.astype(np.float)/255.0
        return color

    def getCamTrajectory(self):
        self.camDistance = 0 
        cam2world = np.loadtxt(self.cam2worldPath)
        cam2world = np.reshape(cam2world[:,1:],(-1,4,4))
        T = cam2world[:,:3,3]
        self.numFrames = T.shape[0]

        color = np.array([0.75,0.,0.]).reshape((1,3))
        radius = 1.5
        for i in range(T.shape[0]-1):
            self.camDistance += np.linalg.norm(T[i+1]-T[i])

    def globalRotation(self):
        cam2world = np.loadtxt(self.cam2worldPath)
        cam2world = np.reshape(cam2world[:,1:],(-1,4,4))
        T = cam2world[:,:3,3]

        lx = np.max(T[:,0]) - np.min(T[:,0])
        ly = np.max(T[:,1]) - np.min(T[:,1])
        max_ratio = lx/ly 
        Rz = np.eye(3)
        for theta in np.arange(0, 2*np.pi, 0.1):
            tmpRz = np.asarray([[np.cos(theta), -np.sin(theta), 0],
                             [np.sin(theta), np.cos(theta), 0],
                             [0, 0, 1]])
            T_new = (tmpRz @ T.transpose()).transpose()
            lx = np.max(T_new[:,0]) - np.min(T_new[:,0])
            ly = np.max(T_new[:,1]) - np.min(T_new[:,1])
            if lx/ly>max_ratio:
                Rz = tmpRz
                max_ratio = lx/ly
        self.Rz = Rz

    def loadBoundingBoxes(self):
        for globalId,v in self.annotation3D.objects.items():
            # skip dynamic objects
            if len(v)>1:
                continue
            for obj in v.values():
                lines=np.array(obj.lines)
                vertices=obj.vertices
                faces=obj.faces
                mesh = open3d.geometry.TriangleMesh()
                mesh.vertices = open3d.utility.Vector3dVector(obj.vertices)
                mesh.triangles = open3d.utility.Vector3iVector(obj.faces)
                color = self.assignColor(globalId, 'semantic')
                semanticId, instanceId = global2local(globalId)
                mesh.paint_uniform_color(color.flatten())
                mesh.compute_vertex_normals()
                self.bboxes.append( mesh )

    def lookat(self, look_from, look_to, tmp = np.asarray([0, 0, 1])):
        forward = - look_from + look_to 
        forward = forward / np.linalg.norm(forward)
        right = -np.cross(tmp, forward)
        up = np.cross(forward, right)

        camToWorld = np.zeros((4,4)) 

        camToWorld[0,0:3] = right
        camToWorld[1,0:3] = up 
        camToWorld[2,0:3] = forward 
        camToWorld[3,0:3] = look_from 
        camToWorld[3,3] = 1

        return camToWorld

    def generateCameraPoses(self, num_interpolated_frames=500):
        cam2world = np.loadtxt(self.cam2worldPath)
        cam2world = np.reshape(cam2world[:,1:],(-1,4,4))

        # a higher camera
        cam2world[:,2,3] = cam2world[:,2,3] + 1.
        cam2world = cam2world[50:,:]
        locs = cam2world[:,:3,3]
        center = np.median(locs, axis=0)

        # extend exist camera trajectory
        tck, u = interpolate.splprep([locs[:50,0],locs[:50,1],locs[:50,2]], s=2)
        x_knots, y_knots, z_knots = interpolate.splev(tck[0], tck)
        u_ext = np.linspace(-2.5,0,50)
        locs_ext = interpolate.splev(u_ext, tck)
        locs_ext = np.asarray(locs_ext).T

        c = center
        d = center-locs[0,:]
        keypoints = locs

        tck, u = interpolate.splprep([keypoints[:,0],keypoints[:,1],keypoints[:,2]]) #s=2
        x_knots, y_knots, z_knots = interpolate.splev(tck[0], tck)
        u_fine = np.linspace(0,1,num_interpolated_frames)
        x_fine, y_fine, z_fine = interpolate.splev(u_fine, tck)
        locs_new = np.asarray([x_fine, y_fine, z_fine]).T
        startIdx = np.argmin(np.linalg.norm(locs_new - locs[0,:], axis=1))

        cams = []

        for idx,(x,y,z) in enumerate(zip(x_fine, y_fine, z_fine)):
            if idx==len(x_fine)-1:
                continue
            look_to = center + (np.array([x_fine[idx+1], y_fine[idx+1], z_fine[idx+1]]) - center)
            cams.append(self.lookat(np.asarray([x,y,z]), look_to).transpose())

        cams = np.asarray(cams)
        cams = np.reshape(cams, (cams.shape[0], 16))
        self.extrinsics = cams

def custom_draw_geometry_with_camera_trajectory(pcd, extrinsics, gtType, outputPath, lineSets=[], showCamera=False, width = 1920,  height = 1080):

    if not showCamera:
        outputPath = os.path.join(outputPath, gtType)
        os.makedirs(outputPath, exist_ok=True)
    else:
        outputPath = os.path.join(outputPath, '%s_cam' % gtType)
        os.makedirs(outputPath, exist_ok=True)
    print(outputPath)

    custom_draw_geometry_with_camera_trajectory.index = -1

    trajectory = open3d.camera.PinholeCameraTrajectory()
    cam_parameters =open3d.camera.PinholeCameraParameters()
    focal = 935.30743608719376
    K = [[focal , 0, width / 2 - 0.5],
         [0, focal, height / 2 - 0.5],
         [0, 0, 1]]
    cam_parameters.intrinsic.width=width
    cam_parameters.intrinsic.height=height
    cam_parameters.intrinsic.intrinsic_matrix=K
    for i in range(extrinsics.shape[0]):
            para = copy.deepcopy(cam_parameters)
            ext = extrinsics[i,:]
            ext = np.reshape(ext, (4,4))
            ext2 = np.zeros((4,4))
            ext2[:3,:3] = ext[:3,:3].transpose()
            ext2[:3, 3:] = -np.matmul(ext2[:3,:3], ext[:3,3:])
            para.extrinsic = ext2 
            trajectory.parameters = trajectory.parameters + [para]

    custom_draw_geometry_with_camera_trajectory.trajectory = trajectory
    custom_draw_geometry_with_camera_trajectory.vis = open3d.visualization.Visualizer()

    def move_forward(vis):
        # This function is called within the Visualizer::run() loop
        # The run loop calls the function, then re-render
        # So the sequence in this function is to:
        # 1. Capture frame
        # 2. index++, check ending criteria
        # 3. Set camera
        # 4. (Re-render)
        ctr = vis.get_view_control()
        ctr.set_constant_z_near(0.5)
        glb = custom_draw_geometry_with_camera_trajectory
        if glb.index >= 0:
            print("Capture image {:05d}".format(glb.index))
            image = vis.capture_screen_float_buffer(False)
            plt.imsave(os.path.join(outputPath, "{:05d}.png".format(glb.index)),\
                    np.asarray(image), dpi = 1)
        glb.index = glb.index + 1
        if glb.index < len(glb.trajectory.parameters):
            ctr.convert_from_pinhole_camera_parameters(glb.trajectory.parameters[glb.index])
        else:
            custom_draw_geometry_with_camera_trajectory.vis.\
                    register_animation_callback(None)
        return False
    vis = custom_draw_geometry_with_camera_trajectory.vis
    vis.create_window(width=width, height=height)
    if isinstance(pcd, list):
        for pcd_ in pcd:
            vis.add_geometry(pcd_)
    elif isinstance(pcd, dict):
        for pcd_ in pcd.values():
            vis.add_geometry(pcd_)
    else:
        vis.add_geometry(pcd)

    if len(lineSets):
        for lineSet in lineSets:
            vis.add_geometry(lineSet)

    vis.get_render_option().background_color = np.asarray([0,0,0])
    vis.get_render_option().light_on = 1
    vis.register_animation_callback(move_forward)
    vis.run()
    vis.destroy_window()

if __name__=='__main__':

    seq=3
    if len(sys.argv)>1:
        seq=int(sys.argv[1])
    v = Video3D(seq)
    v.generateCameraPoses()
    v.globalRotation()
    v.loadBoundingBoxes()
    v.getCamTrajectory()

    bboxes = v.lineSets if v.showWireframe else v.bboxes

    ######################
    # video 
    ######################
    colorType = 'bbox'
    custom_draw_geometry_with_camera_trajectory(v.pointClouds, v.extrinsics, colorType, v.outputPath, bboxes, v.showCamera)