facebookresearch / agenthive

AgentHive provides the primitives and helpers for a seamless usage of robohive within TorchRL.
30 stars 4 forks source link

[Refactor, BugFix] Fixes bugs in environments and delete algos #7

Closed vmoens closed 1 year ago

vmoens commented 1 year ago

To try this PR

import mj_envs.envs.env_variants

from rlhive.rl_envs import RoboHiveEnv
import gym

print(gym.make("visual_franka_slide_close-v3").reset())

env = RoboHiveEnv("visual_franka_slide_close-v3")

env.reset()

mj_envs.envs.env_variants.register_env_variant("franka_slide_close-v3", variants={"obs_keys_wt":{
    "robot_jnt": 1.0,
    "end_effector": 1.0,
    "rgb:right_cam:224x224:2d": 1.0,
    "rgb:left_cam:224x224:2d": 1.0,
}}, variant_id="franka_slide_close_visual-v3")
env2 = RoboHiveEnv("franka_slide_close_visual-v3")

from torchrl.envs.utils import check_env_specs
check_env_specs(env)
check_env_specs(env2)

env2.reset()

env.set_seed(0)
td0 = env.rollout(2)

env2.set_seed(0)
td1 = env.rollout(2)

The info keys should be present in the output tensordicts, e.g.

>>> print(td1)
TensorDict(
    fields={
        action: Tensor(torch.Size([2, 9]), dtype=torch.float32),
        done: Tensor(torch.Size([2, 1]), dtype=torch.bool),
        next: TensorDict(
            fields={
                observation: Tensor(torch.Size([2, 12]), dtype=torch.float32),
                pixels: Tensor(torch.Size([2, 2, 224, 224, 3]), dtype=torch.uint8),
                rwd_dense: Tensor(torch.Size([2]), dtype=torch.float64),
                rwd_dict: TensorDict(
                    fields={
                        approach: Tensor(torch.Size([2]), dtype=torch.float64),
                        bonus: Tensor(torch.Size([2]), dtype=torch.int64),
                        dense: Tensor(torch.Size([2]), dtype=torch.float64),
                        done: Tensor(torch.Size([2]), dtype=torch.bool),
                        obj_goal: Tensor(torch.Size([2]), dtype=torch.float64),
                        pose: Tensor(torch.Size([2]), dtype=torch.float64),
                        solved: Tensor(torch.Size([2]), dtype=torch.bool),
                        sparse: Tensor(torch.Size([2]), dtype=torch.float64)},
                    batch_size=torch.Size([2]),
                    device=cpu,
                    is_shared=False),
                rwd_sparse: Tensor(torch.Size([2]), dtype=torch.float64),
                solved: Tensor(torch.Size([2]), dtype=torch.bool),
                state: TensorDict(
                    fields={
                        body_pos: Tensor(torch.Size([2, 20, 3]), dtype=torch.float64),
                        body_quat: Tensor(torch.Size([2, 20, 4]), dtype=torch.float64),
                        qpos: Tensor(torch.Size([2, 10]), dtype=torch.float64),
                        qvel: Tensor(torch.Size([2, 10]), dtype=torch.float64),
                        site_pos: Tensor(torch.Size([2, 5, 3]), dtype=torch.float64),
                        site_quat: Tensor(torch.Size([2, 5, 4]), dtype=torch.float64)},
                    batch_size=torch.Size([2]),
                    device=cpu,
                    is_shared=False),
                time: Tensor(torch.Size([2]), dtype=torch.float64)},
            batch_size=torch.Size([2]),
            device=cpu,
            is_shared=False),
        observation: Tensor(torch.Size([2, 12]), dtype=torch.float32),
        pixels: Tensor(torch.Size([2, 2, 224, 224, 3]), dtype=torch.uint8),
        reward: Tensor(torch.Size([2, 1]), dtype=torch.float32),
        rwd_dense: Tensor(torch.Size([2]), dtype=torch.float64),
        rwd_dict: TensorDict(
            fields={
                approach: Tensor(torch.Size([2]), dtype=torch.float64),
                bonus: Tensor(torch.Size([2]), dtype=torch.int64),
                dense: Tensor(torch.Size([2]), dtype=torch.float64),
                done: Tensor(torch.Size([2]), dtype=torch.bool),
                obj_goal: Tensor(torch.Size([2]), dtype=torch.float64),
                pose: Tensor(torch.Size([2]), dtype=torch.float64),
                solved: Tensor(torch.Size([2]), dtype=torch.bool),
                sparse: Tensor(torch.Size([2]), dtype=torch.float64)},
            batch_size=torch.Size([2]),
            device=cpu,
            is_shared=False),
        rwd_sparse: Tensor(torch.Size([2]), dtype=torch.float64),
        solved: Tensor(torch.Size([2]), dtype=torch.bool),
        state: TensorDict(
            fields={
                body_pos: Tensor(torch.Size([2, 20, 3]), dtype=torch.float64),
                body_quat: Tensor(torch.Size([2, 20, 4]), dtype=torch.float64),
                qpos: Tensor(torch.Size([2, 10]), dtype=torch.float64),
                qvel: Tensor(torch.Size([2, 10]), dtype=torch.float64),
                site_pos: Tensor(torch.Size([2, 5, 3]), dtype=torch.float64),
                site_quat: Tensor(torch.Size([2, 5, 4]), dtype=torch.float64)},
            batch_size=torch.Size([2]),
            device=cpu,
            is_shared=False),
        time: Tensor(torch.Size([2]), dtype=torch.float64)},
    batch_size=torch.Size([2]),
    device=cpu,
    is_shared=False)

cc @ShahRutav

vmoens commented 1 year ago

@ShahRutav does it make sense to collect all the info dict, or should we select a default subset and let the users change that if needed?