[Feature] Added batched input support for look_at matrix construction

edit: useful for camera DR, asking for pr into main for double checking since it is a critical function for existing envs

sanity test: pull cube uses look_at transform for _default_human_render_camera_configs, run with new look_at function to see no changes occur - python ManiSkill/examples/baselines/ppo/ppo.py --env_id="PullCube-v1" --num_envs=2048 --update_epochs=8 --num_minibatches=32 --total_timesteps=2_000_000 --eval_freq=10 --num-steps=20

The modified function passes the following equality testing, comparing to the old, unbatched version of the look_at function written to work on all inputs that were previously used, plus batched tensors now

from mani_skill.utils.sapien_utils import look_at
import numpy as np
import sapien
from transforms3d.quaternions import mat2quat
import torch

def old_look_at(eye, target, up=(0, 0, 1)) -> sapien.Pose:
    """Get the camera pose in SAPIEN by the Look-At method.

    Note:
        https://www.scratchapixel.com/lessons/mathematics-physics-for-computer-graphics/lookat-function
        The SAPIEN camera follows the convention: (forward, right, up) = (x, -y, z)
        while the OpenGL camera follows (forward, right, up) = (-z, x, y)
        Note that the camera coordinate system (OpenGL) is left-hand.

    Args:
        eye: camera location
        target: looking-at location
        up: a general direction of "up" from the camera.

    Returns:
        sapien.Pose: camera pose
    """

    def normalize_vector(x, eps=1e-6):
        x = np.asarray(x)
        assert x.ndim == 1, x.ndim
        norm = np.linalg.norm(x)
        if norm < eps:
            return np.zeros_like(x)
        else:
            return x / norm

    forward = normalize_vector(np.array(target) - np.array(eye))
    up = normalize_vector(up)
    left = np.cross(up, forward)
    up = np.cross(forward, left)
    rotation = np.stack([forward, left, up], axis=1)
    return sapien.Pose(p=eye, q=mat2quat(rotation))

# testing batch size = 1, list input (can also be tuple input)
for i in range(int(1e3)):
    # test in [10,10,10] square
    eye, target =  list(torch.rand(3) * 10), list(torch.rand(3) * 10)
    new_pose = look_at(eye, target)
    old_pose = old_look_at(eye, target)
    # old pose is sapien.pose, stored in numpy arrays with 1 dim
    old_pose_p = torch.tensor(old_pose.p).view(-1,3)
    old_pose_q = torch.tensor(old_pose.q).view(-1,4)
    # test for equivalence
    assert new_pose.p.numel() == old_pose_p.numel(), (new_pose.p.shape, old_pose.p.shape)
    assert new_pose.q.numel() == old_pose_q.numel(), (new_pose.q.shape, old_pose.q.shape)
    assert ((new_pose.p - old_pose_p).abs() < 1e-6).all(), (new_pose.p, old_pose.p)
    assert ((new_pose.q - old_pose_q).abs() < 1e-6).all(), (new_pose.q, old_pose.q)

n = 10
# testing batch size = n
for i in range(int(1e3)):
    # process batched with new look_at
    eye, target =  torch.rand(n, 3) * 10, torch.rand(n, 3) * 10
    new_pose = look_at(eye, target)
    # process individually with old_look_at - can only do 1 at a time
    new_ps = []
    new_qs = []
    for j in range(n):
        old_single_pose = old_look_at(list(eye[j].numpy()), list(target[j].numpy()))
        new_ps.append(list(old_single_pose.p))
        new_qs.append(list(old_single_pose.q))
    # old pose is sapien.pose, stored in numpy arrays with 1 dim
    old_pose_p = torch.tensor(new_ps).view(-1,3)
    old_pose_q = torch.tensor(new_qs).view(-1,4)
    # test for equivalence
    assert new_pose.p.numel() == old_pose_p.numel(), (new_pose.p.shape, old_pose.p.shape)
    assert new_pose.q.numel() == old_pose_q.numel(), (new_pose.q.shape, old_pose.q.shape)
    assert ((new_pose.p - old_pose_p).abs() < 1e-6).all(), (new_pose.p, old_pose.p)
    assert ((new_pose.q - old_pose_q).abs() < 1e-6).all(), (new_pose.q, old_pose.q)

haosulab / ManiSkill

[Feature] Added batched input support for look_at matrix construction #678