[Questions]: How to correctly project the meshes into the scene for LM and ITODD PRB?

FabianSchuetze commented 1 year ago

I am trying to project the object meshes into the pointcloud for the PBR LM and ITODD scenes. Particularly for some ITODD objects, the projections are off. I create the pointcloud by turning the depth image with the camera's focal length into the pointcloud. The meshes are loaded (and for LM scales by 1/1000) and then transformed according to their pose with cam_R_m2c and cam_t_m2c. For example, a screenshot of the scene from image folder 25 and scene 247 can be seen below. cloud_lm itodd

For the itodd scene, some meshes do not align with the pointcloud, but the procedure works reasonably well for LM. The script used to generate the images is shown below.

What can I do to project a mesh into a pointcloud given a known folder and scene?

Code to generate the projections

import os
import json
import copy
import numpy as np
import open3d as o3d
import cv2

def load_gts(data_root, src_folder, src_img):
    """
    Returns gt poses and infos
    """
    folder = "%06d" % (int(src_folder))
    name1 = os.path.join(data_root, folder, 'scene_gt_info.json')
    name2 = os.path.join(data_root, folder, 'scene_gt.json')
    with open(name1, 'r') as f:
        scene_gt_info = json.load(f)
    with open(name2, 'r') as f:
        scene_gt = json.load(f)
    return scene_gt_info[str(src_img)], scene_gt[str(src_img)]

def create_pointcloud(depth, focal_length):
    """
    Turns the depth image into a pointcloud
    """
    height, width = depth.shape
    cidx = (np.arange(0, width) - width / 2 + 0.5)
    ridx = (np.arange(0, height) - height / 2 + 0.5)
    cloud = np.zeros((3, height, width), dtype=np.float32)
    cloud[0] = cidx * depth / focal_length
    cloud[1] = np.array([ridx]).T * depth / focal_length
    cloud[2] = depth
    return cloud

def load_meshes(model_folder, scale):
    """
    Returns the triangle meshes in a dict
    """
    stored_models = sorted(os.listdir(model_folder))
    stored_models = [os.path.join(model_folder, i) for i in stored_models if '.ply' in i]
    info_path = os.path.join(model_folder, 'models_info.json')
    assert os.path.exists(info_path)
    with open(info_path, 'r') as f:
        model_infos = json.load(f)
    assert len(stored_models) == len(model_infos)
    models = {}
    for key, path in zip(model_infos, stored_models):
        tmp_model = o3d.io.read_triangle_mesh(path)
        vertices = np.array(tmp_model.vertices)
        vertices = vertices * scale
        tmp_model.vertices = o3d.utility.Vector3dVector(vertices)
        models[key] = tmp_model
    return models

def load_depth(data_root, src_folder, src_img):
    """
    Loads the gt depth image
    """
    folder = "%06d" % (int(src_folder))
    counter = "%06d" % (int(src_img))
    path = os.path.join(data_root, folder, 'depth', counter + '.png')
    assert os.path.exists(path)
    img = cv2.imread(path, cv2.IMREAD_ANYDEPTH) * 0.1 / 1000
    return img

def read_focal_length(data_root,  src_folder, src_img):
    """
    Returns the focal length of the scene
    """
    folder = "%06d" % (int(src_folder))
    path = os.path.join(data_root, folder, 'scene_camera.json')
    assert os.path.exists(path)
    with open(path, 'r') as f:
        data = json.load(f)
    cam_d = data[str(src_img)]
    focal_length = cam_d['cam_K'][0]
    return focal_length

def parse_infos(scene_gt, scene_gt_infos, meshes):
    """
    Returns hte gt object poses and the categories
    """
    all_cats, all_poses = [], []
    for (gt, info) in zip(scene_gt, scene_gt_infos):
        if gt['visib_fract'] < 0.5:
            continue
        obj_id = info['obj_id']
        if not str(obj_id) in meshes:
            continue
        rot = np.array(info['cam_R_m2c']).reshape(3, 3).astype(np.float32)
        trans = np.array(info['cam_t_m2c']).astype(np.float32)
        trans = trans.reshape(3, 1)
        trans = trans / 1000
        pose = np.hstack([rot, trans]).astype(np.float32)
        all_poses.append(pose)
        all_cats.append(obj_id)
    return np.array(all_poses), np.array(all_cats)

def visualize_pointcloud(depth, focal_length, poses: np.ndarray,
                         categories: np.ndarray, meshes):
    """
    Projects the meshes according to their poses into the pointcloud
    """
    cloud = create_pointcloud(depth, focal_length)
    points = cloud.reshape(3, -1).T
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3)
    tmp_models = []
    for idx, pose in enumerate(poses):
        category = categories[idx]
        tmp_model = copy.deepcopy(meshes[str(category)])
        pose_ = np.eye(4)
        pose_[:3, :] = pose
        tmp_model.transform(pose_)
        tmp_models.append(tmp_model)
    o3d.visualization.draw_geometries([pcd, frame, *tmp_models])

def main(data_root, src_folder,  src_img, meshes):
    """
    Entrypoint for visualizing the meshes in the pointcloud
    """
    scene_gt, scene_gt_info = load_gts(data_root, src_folder, src_img)
    poses, cats = parse_infos(scene_gt, scene_gt_info, meshes)
    assert len(poses) == len(cats)
    focal_length = read_focal_length(data_root, src_folder, src_img)
    depth_img = load_depth(data_root, src_folder, src_img)
    visualize_pointcloud(depth_img, focal_length, poses, cats, meshes)

if __name__ == "__main__":
    DS = "ITODD"
    if DS == "ITODD":
        DATA_ROOT = '/home/schuetze/Downloads/itodd/train_pbr'
        MODEL_ROOT = '/home/schuetze/Downloads/itodd/train_pbr/cad_models'
        SCALE = 1.0
    if DS == "LM":
        DATA_ROOT = '/home/schuetze/Downloads/lmo/train_pbr'
        MODEL_ROOT = '/home/schuetze/Downloads/lmo/models'
        SCALE = 1.0 / 1000
    MESHES = load_meshes(MODEL_ROOT, SCALE)
    SRC_FOLDER = 25
    SRC_IMG = 247
    main(DATA_ROOT, SRC_FOLDER, SRC_IMG, MESHES)

MartinSmeyer commented 1 year ago

Hey @FabianSchuetze ,

First of all you need to take the whole intrinsics into account to correctly project a depth map into a pointcloud, see e.g. here:

https://github.com/NVlabs/contact_graspnet/blob/a12b7e3a1d7adf38762bfae1c8b84b4550059a6f/contact_graspnet/data.py#L214-L236

Then there is the bop_toolkit that helps you to deal with BOP datasets, it can rule out any mistakes done at the loading stage, e.g. matching the instances with the correct poses.

Before I check the PBR data, please project the meshes into the real validation scenes and see if you have the same issues.

FabianSchuetze commented 1 year ago

Hi @MartinSmeyer - thank you so much for your kind and detailed reply. I will do as you suggest and report back.

FabianSchuetze commented 1 year ago

Thanks again for your instructions. Two things to note:

The depth re-projection code you suggested made a small improvement compare to the approximation I used.
Using the bop_toolkit was instructive, but it selected the same poses and objects ids as the code I used. The entire difference was generated by using the cad models from the bop website vs the cad models from mvtec. These two sets have different center points. Do you know how the bop cad model are processed by any chance?

Anyway, I'm closing the issue - thanks for your help again.

MartinSmeyer commented 1 year ago

Ah I see, yes in BOP the model origins are aligned with the center of their 3D bounding boxes. In principle you could compute the offset, but I would just use the BOP models when working with BOP data if possible.

DLR-RM / BlenderProc

[Questions]: How to correctly project the meshes into the scene for LM and ITODD PRB? #749