Collision checking doesn't seem to work as expected

Does self.planning_world.collide_full() (from here) check for collisions with the pointcloud added using update_point_cloud (from here), or does it just check for collisions between links of the robot? Additionally, update_point_cloud takes in pointcloud from the robot, as is obtained using the gym environment but centered around the robot's base, correct?

I tried to construct the following simple example: Here, the scene simply consists of a robot and a box (loaded using a custom .obj file -- I know the Sapien collision avoidance tutorial creates the box directly, but I want to be able to do this using custom .obj files). I place the robot to be in collision with the box, and I would like to detect this collision. I obtain a 360 degree pointcloud (as done in the ManiSkill gym environment), center the pointcloud to be around the robot's base, and then use update_point_cloud, but somehow self.planning_world.collide_full() is still empty:

import sapien.core as sapien
from sapien.utils import Viewer
import numpy as np
from scipy.spatial.transform import Rotation as R
from PIL import Image
from copy import deepcopy
from ManiSkill.mani_skill.env.camera import CombinedCamera, read_images_from_camera, read_pointclouds_from_camera

# these first four functions are from ManiSkill/mani_skill/env/base_env.py, modified to not use 'self'
def _load_camera(cam_info, agent, scene):
        cam_info = deepcopy(cam_info)
        if 'parent' in cam_info:
                if cam_info['parent'] == 'robot':
                        parent = agent.get_base_link()
                else:
                        assert False
                        parent = self.objects[cam_info['parent']]
                        if isinstance(parent, sapien.Articulation):
                                parent = parent.get_links()[0]
                camera_mount_actor = parent
                del cam_info['parent']
        else:
            camera_mount_actor = scene.create_actor_builder().build_kinematic()
        pose = sapien.Pose(cam_info['position'], cam_info['rotation'])
        del cam_info['position'], cam_info['rotation']
        camera = scene.add_mounted_camera(
            actor=camera_mount_actor, pose=pose, **cam_info, fovx=0
        )
        return camera

def render(mode='color_image', depth=False, seg=None, camera_names=None, scene=None, cameras=None):
        scene.update_render()
        if mode == 'human':
                if self._viewer is None:
                        self._viewer = Viewer(self._renderer)
                        self._setup_viewer()
                self._viewer.render()
                return self._viewer
        else:
                if seg is not None:
                        if seg == 'visual':
                                seg_idx = 0
                        elif seg == 'actor':
                                seg_idx = 1
                        elif seg == 'both':
                                seg_idx = [0, 1]
                        else:
                                raise NotImplementedError()
                else:
                        seg_idx = None
                if camera_names is None:
                        cameras = self.cameras
                else:
                        cameras_new = []
                        for camera in cameras:
                                if camera.get_name() in camera_names:
                                        cameras_new.append(camera)
                        cameras = cameras_new
                if mode == 'color_image' or mode == 'pointcloud':
                        views = {}
                        get_view_func = read_images_from_camera if mode == 'color_image' else read_pointclouds_from_camera
                        for cam in cameras:
                                cam.take_picture()
                        for cam in cameras:
                                if isinstance(cam, CombinedCamera):
                                        view = cam.get_combined_view(mode, depth, seg_idx) # list of dict for image, dict for pointcloud
                                else:
                                        view = get_view_func(cam, depth, seg_idx) # dict
                                views[cam.get_name()] = view
                        return views

def _post_process_view(view_dict, robot_link_ids=None):
        actor_id_seg = view_dict['seg'] # (n, m, 1)
        mask = np.zeros(actor_id_seg.shape, dtype=np.bool)
        for actor_id in robot_link_ids:
            mask = mask | ( actor_id_seg == actor_id )

        view_dict['seg'] = mask

def post_processing(obs, obs_mode, robot_link_ids):
        views = obs[obs_mode]
        for cam_name, view in views.items():
                if isinstance(view, list):
                        for view_dict in view:
                                _post_process_view(view_dict, robot_link_ids=robot_link_ids)
                        combined_view = {}
                        for key in view[0].keys():
                                combined_view[key] = np.concatenate([view_dict[key] for view_dict in view], axis=-1)
                        views[cam_name] = combined_view
                else: # view is a dict
                        _post_process_view(view, robot_link_ids=robot_link_ids)
                if len(views) == 1:
                        view = next(iter(views.values()))
                        obs[obs_mode] = view
        return obs

def main():
        # setup
        engine = sapien.Engine()
        renderer = sapien.VulkanRenderer()
        engine.set_renderer(renderer)

        scene = engine.create_scene()
        scene.set_timestep(1 / 100.0)

        rscene = scene.get_renderer_scene()
        rscene.set_ambient_light([0.5, 0.5, 0.5])
        rscene.add_directional_light([0, 1, -1], [0.5, 0.5, 0.5], shadow=True)
        rscene.add_point_light([1, 2, 2], [1, 1, 1], shadow=True)
        rscene.add_point_light([1, -2, 2], [1, 1, 1], shadow=True)
        rscene.add_point_light([-1, 0, 1], [1, 1, 1], shadow=True)

        # viewer
        viewer = Viewer(renderer)  # Create a viewer (window)
        viewer.set_scene(scene)  # Bind the viewer and the scene
        viewer.set_camera_xyz(x=-4, y=0, z=2)
        viewer.set_camera_rpy(r=0, p=-np.arctan2(2, 4), y=0)
        viewer.window.set_camera_parameters(near=0.05, far=100, fovy=1)

        # load box object
        loader: sapien.URDFLoader = scene.create_urdf_loader()
        urdf = 'box.urdf'
        asset = loader.load(urdf)
        rotation = np.array([1, 0, 0, 0])
        translation = np.array([-1, -1, 0])
        asset.set_pose(sapien.Pose(p=translation, q=rotation))

        # load and place robot (using code from the gym environment so we can obtain pointclouds)
        import pathlib
        from mani_skill.utils.config_parser import (
                preprocess,
                process_variables,
                process_variants,
        )
        import importlib
        config_file = pathlib.Path('ManiSkill/mani_skill/assets/config_files/open_cabinet_drawer.yml')
        yaml_config = preprocess(config_file)
        config = deepcopy(yaml_config)
        _level_rng = np.random.RandomState(seed=0)
        variant_config = {'partnet_mobility_id': '1004'}
        config = process_variables(config, _level_rng)
        level_config, level_variant_config = process_variants(
                config, _level_rng, variant_config
        )
        agent_config = level_config['agent']
        module_name, class_name = agent_config['agent_class'].rsplit('.', 1)
        module = importlib.import_module(module_name)
        AgentClass = getattr(module, class_name)
        agent = AgentClass(engine, scene, agent_config)

        # choose robot configuration such that arm is colliding with box
        cur_state = agent.get_state()
        cur_state[12:14] = np.array([-1, 0])
        cur_state[18:28] = np.zeros(10)
        cur_state[19] = 1.45
        cur_state[20] = -0.7 # 1.75
        agent.set_state(cur_state)

        # get pointcloud from this position
        robot_link_ids = agent.get_link_ids()
        cam_infos = [level_config['render']['cameras'][1]] # only keep robot cameras, no world camera
        cameras = []
        for cam_info in cam_infos:
                if 'sub_cameras' in cam_info:
                        sub_cameras = [_load_camera(sub_cam_info, agent, scene) for sub_cam_info in cam_info['sub_cameras']]
                        combined_camera = CombinedCamera(cam_info['name'], sub_cameras)
                        cameras.append(combined_camera)
                else:
                        assert False
                        camera = self._load_camera(cam_info)
                        cameras.append(camera)
        seg='actor'
        obs = {'agent': agent.get_state(with_controller_state=False), 'pointcloud': render(mode='pointcloud', camera_names=['robot'], seg=seg, scene=scene, cameras=cameras)}
        obs = post_processing(obs, obs_mode='pointcloud', robot_link_ids=robot_link_ids)
        np.save('pointcloud_test.npy', obs['pointcloud'])

        # adjust pointcloud to be centered around robot
        obs['pointcloud']['xyz'][:, 0] += 1

        '''
        # collision detection
        import mplib
        link_names = ['panda_link0', 'panda_link1', 'panda_link2', 'panda_link3', 'panda_link4', 'panda_link5', 'panda_link6', 'panda_link7', 'panda_link8', 'panda_hand', 'panda_leftfinger', 'panda_rightfinger']
        joint_names = ['panda_joint1', 'panda_joint2', 'panda_joint3', 'panda_joint4', 'panda_joint5', 'panda_joint6', 'panda_joint7', 'panda_finger_joint1', 'panda_finger_joint2']
        planner = mplib.Planner(
                urdf="./assets/robot/panda/panda.urdf",
                srdf="./assets/robot/panda/panda.srdf",
                user_link_names=link_names,
                user_joint_names=joint_names,
                move_group="panda_hand",
                joint_vel_limits=np.ones(7),
                joint_acc_limits=np.ones(7))
        planner.update_point_cloud(obs['pointcloud']['xyz'])
        path = planner.plan(goal_pose=np.zeros(9),current_qpos=agent.get_state(by_dict=True)['qpos'][1:],use_point_cloud=True)
        '''

        while not viewer.closed:  # Press key q to quit
            viewer.render()

if __name__ == '__main__':
        main()

With the collision detection portion at the end commented out, we can see in the viewer that the robot is clearly in collision with the box. However, when we uncomment this part, we see that self.planning_world.collide_full() is still empty -- this is not the expected behavior, is it?

For completeness, box.urdf:

<?xml version="1.0" ?>
<robot name="box">
        <link name="link_box">
                <visual name="box">
                        <origin xyz="0 0 0"/>
                        <geometry>
                                <mesh filename="box.obj"/>
                        </geometry>
                </visual>
                <collision>
                        <origin xyz="0 0 0"/>
                        <geometry>
                                <mesh filename="box.obj"/>
                        </geometry>
                </collision>
        </link>
</robot>

and box.obj:

o Mesh
v -0.15 -0.15 -0.25
v 0.15 -0.15 -0.25
v 0.15 0.15 -0.25
v -0.15 0.15 -0.25
v -0.15 -0.15 0.5
v 0.15 -0.15 0.5
v 0.15 0.15 0.5
v -0.15 0.15 0.5
vn 0 -1 0.5
vn 0 1 0.5
vn -1 0 0.5
vn 1 0 0
vn 0 0 1
vn 0 0 -1
vt 0 0
vt 1 0
vt 0 1
vt 1 1
f 1/1/1 2/2/1 6/4/1
f 1/1/1 6/4/1 5/3/1
f 3/1/2 4/2/2 8/4/2
f 3/1/2 8/4/2 7/3/2
f 4/1/3 1/2/3 5/4/3
f 4/1/3 5/4/3 8/3/3
f 2/1/4 3/2/4 7/4/4
f 2/1/4 7/4/4 6/3/4
f 5/1/5 6/2/5 7/4/5
f 5/1/5 7/4/5 8/3/5
f 4/1/6 3/2/6 2/4/6
f 4/1/6 2/4/6 1/3/6

Apologies for the incredibly long post.

haosulab / MPlib

Collision checking doesn't seem to work as expected #5