[Bug Report] Wandb sweep agent cuts in the thread of Isaac Sim before calling app.update(), which causes the process to hang forever #658

Open breadli428 opened 2 months ago

breadli428 commented 2 months ago

Describe the bug

Wandb sweep agent cuts in the thread of Isaac Sim before calling app.update(), which causes the process to hang forever.

Steps to reproduce

# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause

"""Script to finetune RL agent with RSL-RL."""

from __future__ import annotations

"""Launch Isaac Sim Simulator first."""

import argparse
import os

from import AppLauncher

# local imports
import cli_args  # isort: skip

# add argparse arguments
parser = argparse.ArgumentParser(description="Finetune an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during finetuning.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--run_num", type=int, default=None, help="Run number for the experiment on the cluster.")
# append RSL-RL cli arguments
# append AppLauncher cli args
args_cli = parser.parse_args()

# overwrite args for cluster training
args_cli.headless = True
args_cli.task = "Isaac-Velocity-Flat-Anymal-D-Finetune-v0"
args_cli.load_run = "2024-04-29_15-05-57"
args_cli.logger = "wandb"
run_num = args_cli.run_num

# load cheaper kit config in headless
if args_cli.headless:
    app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.gym.headless.kit"
    app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.kit"

# launch omniverse app
app_launcher = AppLauncher(args_cli, experience=app_experience)
simulation_app =

"""Rest everything follows."""

import gymnasium as gym
import os
import torch
import traceback
from datetime import datetime

import carb
from rsl_rl.runners import OnPolicyRunner, MBPOOnPolicyRunner

from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.utils.dict import print_dict
from import dump_pickle, dump_yaml

import omni.isaac.contrib_tasks  # noqa: F401
import omni.isaac.orbit_tasks  # noqa: F401
from omni.isaac.orbit_tasks.utils import get_checkpoint_path, parse_env_cfg
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper

from sweep_config import sweep_config, update_config_from_sweep
import wandb
import time

SWEEP_ID_FILE = "logs/rsl_rl/anymal_d_flat/sweep_id.txt"

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False

def train_sweep():
    # parse configuration
    env_cfg: RLTaskEnvCfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
    wandb.init(project="orbit", entity=os.environ["WANDB_USERNAME"])
    env_cfg, agent_cfg = update_config_from_sweep(env_cfg, agent_cfg, wandb.config)
    run_experiment(env_cfg, agent_cfg)

def run_sweep():
    if run_num == 0:
        sweep_id = wandb.sweep(sweep_config, project="orbit")
        # Save the sweep ID to a shared location
        with open(SWEEP_ID_FILE, "w") as f:
        # Wait for the sweep ID file to be available
        print("[Wandb] Waiting for sweep ID file")
        while not os.path.exists(SWEEP_ID_FILE):
            time.sleep(1)  # Wait until the file exists
        with open(SWEEP_ID_FILE, "r") as f:
            sweep_id =
        wandb.agent(sweep_id, function=train_sweep, project="orbit", count=1)

def run_experiment(env_cfg, agent_cfg):
    """Finetune with RSL-RL agent."""

    # specify directory for logging experiments
    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Logging experiment in directory: {log_root_path}")
    # specify directory for logging runs: {time-stamp}_{run_name}
    log_dir ="%Y-%m-%d_%H-%M-%S")
    if agent_cfg.run_name:
        log_dir += f"_{agent_cfg.run_name}"
    log_dir = os.path.join(log_root_path, log_dir)

    # create isaac environment
    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if else None)
    # wrap for video recording
        video_kwargs = {
            "video_folder": os.path.join(log_dir, "videos"),
            "step_trigger": lambda step: step % args_cli.video_interval == 0,
            "video_length": args_cli.video_length,
            "disable_logger": True,
        print("[INFO] Recording videos during finetuning.")
        print_dict(video_kwargs, nesting=4)
        env = gym.wrappers.RecordVideo(env, **video_kwargs)
    # wrap around environment for rsl-rl
    env = RslRlVecEnvWrapper(env)

    # create runner from rsl-rl
    runner = MBPOOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
    # write git state to logs
    # save resume path before creating a new log_dir
    if agent_cfg.resume:
        # get path to previous checkpoint
        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
        # load previously trained model
        runner.load(resume_path, load_optimizer=False, load_system_dynamics=False)

    # set seed of the environment

    # dump the configuration into log-directory
    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
    dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
    dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)

    # run finetuning
    runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)

    # close the simulator

if __name__ == "__main__":
        # run the main execution
    except Exception as err:
        # close sim app

Mayank has a explanation and a solution: the correct order of calling should be 1. init sweep agent, 2. create simulation app, 3. reset/update simulation.


AlessioMosca commented 1 month ago

Hi @ breadli428, I have met the same problem, Did you solve it? If so, may I ask how?

Best regards Alessio

Mayankm96 commented 1 month ago

Current solution is to move the creation of the simulation app inside the train_sweep function. All the imports need to be adjusted accordingly.

AlessioMosca commented 1 month ago

@Mayankm96 Many thanks, may I ask which import should I edit? Should I edit the imports inside the "sweep" file or the imports used inside the IsaacLab such as source/extensions/omni.isaac.lab/omni/isaac/lab/utils/

Best regards Alessio

AlessioMosca commented 1 month ago

Hi @Mayankm96 , I have tried to modify my code according to your suggestion. However, I am not able to use wandb. Here is my code:

# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause

"""Script to train RL agent with RSL-RL."""

"""Launch Isaac Sim Simulator first."""

import argparse

from import AppLauncher

# local imports
import cli_args  # isort: skip
import os

# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")

parser.add_argument("--run_num", type=int, default=None, help="Run number for the experiment on the cluster.")

# append RSL-RL cli arguments
# append AppLauncher cli args
args_cli = parser.parse_args()

# # overwrite args for cluster training
# args_cli.headless = True
# args_cli.task = "grace-rough-train"
# # args_cli.load_run = "2024-04-29_15-05-57"
# args_cli.logger = "wandb"
run_num = args_cli.run_num

# load cheaper kit config in headless
if args_cli.headless:
    app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.gym.headless.kit"
    app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.kit"

# always enable cameras to record video
    args_cli.enable_cameras = True

# import sys
# path_old = set(sys.path)
# app_launcher = AppLauncher(args_cli)
# # simulation_app =
# path = set(sys.path)

import gymnasium as gym
import os
import torch
from datetime import datetime

import carb
from rsl_rl.runners import OnPolicyRunner

# from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
from omni.isaac.lab.utils.dict import print_dict
from import dump_pickle, dump_yaml

# import omni.isaac.lab_tasks  # noqa: F401
# from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
# from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False

import wandb
# from sweep_config import sweep_config, update_config_from_sweep DA CAPIRE COME FARE

import traceback
import time

SWEEP_ID_FILE = "logs/rsl_rl/grace_rough/sweep_id.txt"

sweep_config = {
    "method": "bayes",
    "name": "grace_isaaclab",
    "metric": {"goal": "maximize", "name": "reward"},
    "parameters": {
        "stiffness": {"max": 80, "min": 30},
        "damping": {"max": 7., "min": 0.5},

def update_config_from_sweep(env_cfg, agent_cfg, wandb_config):
    # env_cfg.rewards.action_rate_l2.weight =
    env_cfg.scene.robot.actuators["j1"].stiffness['.*'] = wandb_config.stiffness
    return (env_cfg, agent_cfg)

# env_cfg.rewards.action_rate_l2.weight
# env_cfg.scene.robot.actuators["j1"].stiffness['.*']
import sys
def train_sweep():
    # parse configuration
    app_launcher = AppLauncher(args_cli)
    simulation_app =

    from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
    import omni.isaac.lab_tasks  # noqa: F401
    from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
    from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper

    env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(
        args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)

    wandb.init(project="grace_isaaclab") #, entity=os.environ["WANDB_USERNAME"]) config=sweep_config

    env_cfg, agent_cfg = update_config_from_sweep(env_cfg, agent_cfg, wandb.config)
    run_experiment(env_cfg, agent_cfg)

def run_sweep():
    if run_num == 0:
        sweep_id = wandb.sweep(sweep_config, project="grace_isaaclab")
        # Save the sweep ID to a shared location
        with open(SWEEP_ID_FILE, "w") as f:
        # Wait for the sweep ID file to be available
        print("[Wandb] Waiting for sweep ID file")
        while not os.path.exists(SWEEP_ID_FILE):
            time.sleep(1)  # Wait until the file exists
        with open(SWEEP_ID_FILE, "r") as f:
            sweep_id =
        wandb.agent(sweep_id, function=train_sweep, project="grace_isaaclab", count=1)

def run_experiment(env_cfg, agent_cfg):
    """Train with RSL-RL agent."""
    # parse configuration
    # env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(
    #     args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    # )
    # agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)

    # specify directory for logging experiments
    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Logging experiment in directory: {log_root_path}")
    # specify directory for logging runs: {time-stamp}_{run_name}
    log_dir ="%Y-%m-%d_%H-%M-%S")
    if agent_cfg.run_name:
        log_dir += f"_{agent_cfg.run_name}"
    log_dir = os.path.join(log_root_path, log_dir)

    # max iterations for training
    if args_cli.max_iterations:
        agent_cfg.max_iterations = args_cli.max_iterations

    # create isaac environment
    env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if else None)
    # wrap for video recording
        video_kwargs = {
            "video_folder": os.path.join(log_dir, "videos"),
            "step_trigger": lambda step: step % args_cli.video_interval == 0,
            "video_length": args_cli.video_length,
            "disable_logger": True,
        print("[INFO] Recording videos during training.")
        print_dict(video_kwargs, nesting=4)
        env = gym.wrappers.RecordVideo(env, **video_kwargs)
    # wrap around environment for rsl-rl
    env = RslRlVecEnvWrapper(env)

    # create runner from rsl-rl
    runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
    # write git state to logs
    # save resume path before creating a new log_dir
    if agent_cfg.resume:
        # get path to previous checkpoint
        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
        # load previously trained model

    # set seed of the environment

    # dump the configuration into log-directory
    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
    dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
    dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)

    # run training
    runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)

    # close the simulator

if __name__ == "__main__":
        # run the main execution
    except Exception as err:
        # close sim app
        # simulation_app.close()

The error that I met is the following:

Traceback (most recent call last):
  File "/home/lab/anaconda3/envs/isaaclab/lib/python3.10/site-packages/wandb/agents/", line 307, in _run_job
  File "/home/lab/IsaacLab/source/standalone/workflows/rsl_rl/", line 116, in train_sweep
    app_launcher = AppLauncher(args_cli)
  File "/home/lab/IsaacLab/source/extensions/omni.isaac.lab/omni/isaac/lab/app/", line 113, in __init__
  File "/home/lab/IsaacLab/source/extensions/omni.isaac.lab/omni/isaac/lab/app/", line 532, in _create_app
    self._app = SimulationApp(self._sim_app_config, experience=self._sim_experience_file)
  File "/home/lab/anaconda3/envs/isaaclab/lib/python3.10/site-packages/isaacsim/exts/omni.isaac.kit/omni/isaac/kit/", line 208, in __init__
    signal.signal(signal.SIGINT, signal_handler)
  File "/home/lab/anaconda3/envs/isaaclab/lib/python3.10/", line 56, in signal
    handler = _signal.signal(_enum_to_int(signalnum), _enum_to_int(handler))
ValueError: signal only works in main thread of the main interpreter

wandb: ERROR Run 6jiqul36 errored:
wandb: ERROR Traceback (most recent call last):
wandb: ERROR   File "/home/lab/anaconda3/envs/isaaclab/lib/python3.10/site-packages/wandb/agents/", line 307, in _run_job
wandb: ERROR     self._function()
wandb: ERROR   File "/home/lab/IsaacLab/source/standalone/workflows/rsl_rl/", line 116, in train_sweep
wandb: ERROR     app_launcher = AppLauncher(args_cli)
wandb: ERROR   File "/home/lab/IsaacLab/source/extensions/omni.isaac.lab/omni/isaac/lab/app/", line 113, in __init__
wandb: ERROR     self._create_app()
wandb: ERROR   File "/home/lab/IsaacLab/source/extensions/omni.isaac.lab/omni/isaac/lab/app/", line 532, in _create_app
wandb: ERROR     self._app = SimulationApp(self._sim_app_config, experience=self._sim_experience_file)
wandb: ERROR   File "/home/lab/anaconda3/envs/isaaclab/lib/python3.10/site-packages/isaacsim/exts/omni.isaac.kit/omni/isaac/kit/", line 208, in __init__
wandb: ERROR     signal.signal(signal.SIGINT, signal_handler)
wandb: ERROR   File "/home/lab/anaconda3/envs/isaaclab/lib/python3.10/", line 56, in signal
wandb: ERROR     handler = _signal.signal(_enum_to_int(signalnum), _enum_to_int(handler))
wandb: ERROR ValueError: signal only works in main thread of the main interpreter
wandb: ERROR 

May I ask If you have any suggestions?

Best Regards Alessio

AlessioMosca commented 2 weeks ago

Hi @Mayankm96 ,

I attempted to resolve the issue with signal handling by commenting out lines 200 to 209 in the file:


Here is the code section I commented out:

def signal_handler(signal, frame):
    # disable logging warnings as we are going to terminate the process
    _logging = carb.logging.acquire_logging()

signal.signal(signal.SIGINT, signal_handler)

By doing this, I was able to start the first sweep with Weights & Biases (wandb). However, after the first simulation, when the run_experiment() function ends, the call to simulation_app.close() causes the entire program to exit.

The function called by the wandb agent is as follows:

def train_sweep():

    app_launcher = AppLauncher(args_cli)
    simulation_app =

    from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
    from omni.isaac.lab.utils.dict import print_dict
    from import dump_pickle, dump_yaml

    import omni.isaac.lab_tasks  # noqa: F401
    from omni.isaac.lab_tasks.utils import get_checkpoint_path, parse_env_cfg
    from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper

    env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(
        args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)

    wandb.init(project=args_cli.log_project_name, config=sweep_config) #, entity=os.environ["WANDB_USERNAME"]) config=sweep_config

    env_cfg, agent_cfg = update_config_from_sweep(env_cfg, agent_cfg, wandb.config)

    run_experiment(env_cfg, agent_cfg)


I believe that the reason the entire program exits is the call to self._framework.unload_all_plugins() at line 546 in the aforementioned file (where the close() method is defined).

The output printed in the terminal is:

[102.334s] Simulation App Shutting Down

Process finished with exit code 0

It seems that the agent is killed whenever simulation_app.close() is called.

May I ask if you have any solution?

Best Regards Alessio