PKU-Alignment / safety-gymnasium

NeurIPS 2023: Safety-Gymnasium: A Unified Safe Reinforcement Learning Benchmark
https://safety-gymnasium.readthedocs.io/en/latest/
Apache License 2.0
402 stars 52 forks source link

[Question] How to use Safe Isaac Gym? #92

Closed hmnhonari closed 6 months ago

hmnhonari commented 11 months ago

Required prerequisites

Questions

Hi, I would like to use the environments provided in the safe Isaac gym section. However, there is no documentation on how to import and make them. I used gym.make("ShadowHandCatchOver2UnderarmSafeJoint") to create the environment but I get the error "Environment ShadowHandCatchOver2UnderarmSafeJoint is not registered in safety-gymnasium". How can I make the environment?

muchvo commented 11 months ago

Please refer to this discussion.

hmnhonari commented 10 months ago

Thank you for the reply. I used the codes you referred to in the discussion to develop the environment. However, when I use env.reset() to reset the environment, as also evident in the viewer GUI, the environment seems to not change at all and the robot does not return to its initial configuration.

import isaacgym 
from safepo.common.env import make_sa_isaac_env
import torch
from safepo.utils.config import single_agent_args, isaac_gym_map, parse_sim_params
import sys
import random
import time
import os
import numpy as np

isaac_gym_specific_cfg = {
    'total_steps': 100000000,
    'steps_per_epoch': 32768,
    'hidden_sizes': [1024, 1024, 512],
    'gamma': 0.96,
    'target_kl': 0.016,
    'num_mini_batch': 4,
    'use_value_coefficient': True,
    'learning_iters': 8,
    'max_grad_norm': 1.0,
    'use_critic_norm': False,
}

def main(args, cfg_env=None):
    args.num_envs=1
    args.cost_limit=1
    cfg_env['env']['numEnvs']=1
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(4)
    device = torch.device(f'{args.device}:{args.device_id}')
    sim_params = parse_sim_params(args, cfg_env, None)
    env = make_sa_isaac_env(args=args, cfg=cfg_env, sim_params=sim_params)
    eval_env = env
    obs_space = env.observation_space
    act_space = env.action_space
    args.num_envs = env.num_envs
    config = isaac_gym_specific_cfg
    while True:
        a=act_space.sample()
        state, reward, cost, terminated, truncated, info=env.step(a[np.newaxis,:])
        if terminated or cost:
            print('reset!')
            env.reset()

args, cfg_env = single_agent_args('FreightFrankaPickAndPlace')
relpath = time.strftime("%Y-%m-%d-%H-%M-%S")
subfolder = "-".join(["seed", str(args.seed).zfill(3)])
relpath = "-".join([subfolder, relpath])
algo = os.path.basename(__file__).split(".")[0]
args.log_dir = os.path.join(args.log_dir, args.experiment, args.task, algo, relpath)
if not args.write_terminal:
    terminal_log_name = "terminal.log"
    error_log_name = "error.log"
    terminal_log_name = f"seed{args.seed}_{terminal_log_name}"
    error_log_name = f"seed{args.seed}_{error_log_name}"
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir, exist_ok=True)
    with open(
        os.path.join(
            f"{args.log_dir}",
            terminal_log_name,
        ),
        "w",
        encoding="utf-8",
    ) as f_out:
        sys.stdout = f_out
        with open(
            os.path.join(
                f"{args.log_dir}",
                error_log_name,
            ),
            "w",
            encoding="utf-8",
        ) as f_error:
            sys.stderr = f_error
            main(args, cfg_env)
else:
    main(args, cfg_env)
muchvo commented 10 months ago

Please take a look at line 755 for clarification. In our environment, resets are performed automatically. The external reset call is only used to obtain the initial state when the environment is used for the first time.