Closed vmoens closed 1 year ago
To try this PR
import mj_envs.envs.env_variants from rlhive.rl_envs import RoboHiveEnv import gym print(gym.make("visual_franka_slide_close-v3").reset()) env = RoboHiveEnv("visual_franka_slide_close-v3") env.reset() mj_envs.envs.env_variants.register_env_variant("franka_slide_close-v3", variants={"obs_keys_wt":{ "robot_jnt": 1.0, "end_effector": 1.0, "rgb:right_cam:224x224:2d": 1.0, "rgb:left_cam:224x224:2d": 1.0, }}, variant_id="franka_slide_close_visual-v3") env2 = RoboHiveEnv("franka_slide_close_visual-v3") from torchrl.envs.utils import check_env_specs check_env_specs(env) check_env_specs(env2) env2.reset() env.set_seed(0) td0 = env.rollout(2) env2.set_seed(0) td1 = env.rollout(2)
The info keys should be present in the output tensordicts, e.g.
info
>>> print(td1) TensorDict( fields={ action: Tensor(torch.Size([2, 9]), dtype=torch.float32), done: Tensor(torch.Size([2, 1]), dtype=torch.bool), next: TensorDict( fields={ observation: Tensor(torch.Size([2, 12]), dtype=torch.float32), pixels: Tensor(torch.Size([2, 2, 224, 224, 3]), dtype=torch.uint8), rwd_dense: Tensor(torch.Size([2]), dtype=torch.float64), rwd_dict: TensorDict( fields={ approach: Tensor(torch.Size([2]), dtype=torch.float64), bonus: Tensor(torch.Size([2]), dtype=torch.int64), dense: Tensor(torch.Size([2]), dtype=torch.float64), done: Tensor(torch.Size([2]), dtype=torch.bool), obj_goal: Tensor(torch.Size([2]), dtype=torch.float64), pose: Tensor(torch.Size([2]), dtype=torch.float64), solved: Tensor(torch.Size([2]), dtype=torch.bool), sparse: Tensor(torch.Size([2]), dtype=torch.float64)}, batch_size=torch.Size([2]), device=cpu, is_shared=False), rwd_sparse: Tensor(torch.Size([2]), dtype=torch.float64), solved: Tensor(torch.Size([2]), dtype=torch.bool), state: TensorDict( fields={ body_pos: Tensor(torch.Size([2, 20, 3]), dtype=torch.float64), body_quat: Tensor(torch.Size([2, 20, 4]), dtype=torch.float64), qpos: Tensor(torch.Size([2, 10]), dtype=torch.float64), qvel: Tensor(torch.Size([2, 10]), dtype=torch.float64), site_pos: Tensor(torch.Size([2, 5, 3]), dtype=torch.float64), site_quat: Tensor(torch.Size([2, 5, 4]), dtype=torch.float64)}, batch_size=torch.Size([2]), device=cpu, is_shared=False), time: Tensor(torch.Size([2]), dtype=torch.float64)}, batch_size=torch.Size([2]), device=cpu, is_shared=False), observation: Tensor(torch.Size([2, 12]), dtype=torch.float32), pixels: Tensor(torch.Size([2, 2, 224, 224, 3]), dtype=torch.uint8), reward: Tensor(torch.Size([2, 1]), dtype=torch.float32), rwd_dense: Tensor(torch.Size([2]), dtype=torch.float64), rwd_dict: TensorDict( fields={ approach: Tensor(torch.Size([2]), dtype=torch.float64), bonus: Tensor(torch.Size([2]), dtype=torch.int64), dense: Tensor(torch.Size([2]), dtype=torch.float64), done: Tensor(torch.Size([2]), dtype=torch.bool), obj_goal: Tensor(torch.Size([2]), dtype=torch.float64), pose: Tensor(torch.Size([2]), dtype=torch.float64), solved: Tensor(torch.Size([2]), dtype=torch.bool), sparse: Tensor(torch.Size([2]), dtype=torch.float64)}, batch_size=torch.Size([2]), device=cpu, is_shared=False), rwd_sparse: Tensor(torch.Size([2]), dtype=torch.float64), solved: Tensor(torch.Size([2]), dtype=torch.bool), state: TensorDict( fields={ body_pos: Tensor(torch.Size([2, 20, 3]), dtype=torch.float64), body_quat: Tensor(torch.Size([2, 20, 4]), dtype=torch.float64), qpos: Tensor(torch.Size([2, 10]), dtype=torch.float64), qvel: Tensor(torch.Size([2, 10]), dtype=torch.float64), site_pos: Tensor(torch.Size([2, 5, 3]), dtype=torch.float64), site_quat: Tensor(torch.Size([2, 5, 4]), dtype=torch.float64)}, batch_size=torch.Size([2]), device=cpu, is_shared=False), time: Tensor(torch.Size([2]), dtype=torch.float64)}, batch_size=torch.Size([2]), device=cpu, is_shared=False)
cc @ShahRutav
@ShahRutav does it make sense to collect all the info dict, or should we select a default subset and let the users change that if needed?
To try this PR
The
info
keys should be present in the output tensordicts, e.g.cc @ShahRutav