Denys88 / rl_games

RL implementations
MIT License
800 stars 135 forks source link

How can I export a trained neural network? #266

Closed jheyeo222 closed 7 months ago

jheyeo222 commented 7 months ago

Hello, I am a student. I have trained a robot within the Isaac Gym simulator using the Isaac Gym's RL game library. Now, I am looking to apply this trained model to a real robot.

For this, I need to replicate the neural network architecture used in Isaac Gym's RL game library on the robot and load the .pth file.

However, implementing the neural network architecture exactly as it is in Isaac Gym's RL game library seems overly complex. Is there a way to accurately reproduce the same neural network architecture from Isaac Gym's RL game library for real robot implementation? Alternatively, are there other methods to effectively apply this trained model to a real robot?

Denys88 commented 7 months ago

here is an export example: https://colab.research.google.com/github/Denys88/rl_games/blob/master/notebooks/train_and_export_onnx_example_continuous.ipynb

jheyeo222 commented 7 months ago

here is an export example: https://colab.research.google.com/github/Denys88/rl_games/blob/master/notebooks/train_and_export_onnx_example_continuous.ipynb

I'm truly grateful for your response. In fact, I've been working on implementing the code from this issue on GitHub: [https://github.com/Denys88/rl_games/issues/226], which you had explained in a different query. Initially, I had some difficulties understanding it, but now I've grasped how onnx and isaacgym structure the model export process. Thank you very much for your assistance. I apologize for the inconvenience.

Quest2GM commented 2 months ago

Hi @Denys88 @jheyeo222

I am trying to achieve this same thing but with a new custom task added to OmniIsaacGymEnvs and an already pretrained model. I've modified the code that you have in the Google Colab above, but I can't get it to work:

from rl_games.torch_runner import Runner
import os
import yaml
import torch
import matplotlib.pyplot as plt
import gym
from IPython import display
import numpy as np
import onnx
import onnxruntime as ort

from rl_games.common import env_configurations, vecenv
from rlgames_utils import RLGPUAlgoObserver, RLGPUEnv
from vec_env_rlgames import VecEnvRLGames

CONFIG = {
    "params": {
        "seed": 42,
        "algo": {"name": "a2c_continuous"},
        "model": {"name": "continuous_a2c_logstd"},
        "network": {
            "name": "actor_critic",
            "separate": False,
            "space": {
                "continuous": {
                    "mu_activation": "None",
                    "sigma_activation": "None",
                    "mu_init": {"name": "default"},
                    "sigma_init": {"name": "const_initializer", "val": 0},
                    "fixed_sigma": True,
                }
            },
            "mlp": {
                "units": [2048, 1024, 512, 256, 64],
                "activation": "relu",
                "d2rl": False,
                "initializer": {"name": "default"},
                "regularizer": {"name": "None"},
            },
        },
        "load_checkpoint": True,
        "load_path": "./runs/NavGPT/nn/NavGPT.pth",
        "config": {
            "name": "NavGPT",
            "full_experiment_name": "NavGPT",
            "env_name": "rlgpu",
            "device": "cuda:0",
            "device_name": "cuda:0",
            "multi_gpu": False,
            "ppo": True,
            "mixed_precision": False,
            "normalize_input": True,
            "normalize_value": True,
            "num_actors": 1,
            "reward_shaper": {"scale_value": 0.1},
            "normalize_advantage": True,
            "gamma": 0.99,
            "tau": 0.95,
            "learning_rate": 0.0006,
            "lr_schedule": "adaptive",
            "kl_threshold": 0.008,
            "score_to_win": 20000,
            "max_epochs": 100,
            "save_best_after": 100,
            "save_frequency": 50,
            "grad_norm": 1.0,
            "entropy_coef": 0.0,
            "truncate_grads": True,
            "e_clip": 0.2,
            "horizon_length": 24,
            "minibatch_size": 2048,
            "mini_epochs": 10,
            "critic_coef": 3,
            "clip_value": False,
            "seq_length": 4,
            "bounds_loss_coef": 0.0001,
        },
    }
}

class ModelWrapper(torch.nn.Module):
    '''
    Main idea is to ignore outputs which we don't need from model
    '''
    def __init__(self, model):
        torch.nn.Module.__init__(self)
        self._model = model

    def forward(self,input_dict):
        input_dict['obs'] = self._model.norm_obs(input_dict['obs'])
        '''
        just model export doesn't work. Looks like onnx issue with torch distributions
        thats why we are exporting only neural network
        '''
        #print(input_dict)
        #output_dict = self._model.a2c_network(input_dict)
        #input_dict['is_train'] = False
        #return output_dict['logits'], output_dict['values']
        return self._model.a2c_network(input_dict)

if __name__ == "__main__":

    env = VecEnvRLGames(
        headless=True,
        # sim_device=cfg.device_id,
        # enable_livestream=cfg.enable_livestream,
        # enable_viewport=enable_viewport or cfg.enable_recording,
        experience=None
    )

    vecenv.register("RLGPU", lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs))
    env_configurations.register("rlgpu", {"vecenv_type": "RLGPU", "env_creator": lambda **kwargs: env})

    runner = Runner(RLGPUAlgoObserver())
    runner.load(CONFIG)
    runner.run({
        'train': False,
        'play': True
    })

    agent = runner.create_player()
    agent.restore('./runs/NavGPT/nn/NavGPT.pth')

    import rl_games.algos_torch.flatten as flatten
    inputs = {
        'obs' : torch.zeros((1,) + agent.obs_shape).to(agent.device),
        'rnn_states' : agent.states,
    }

    with torch.no_grad():
        adapter = flatten.TracingAdapter(ModelWrapper(agent.model), inputs, allow_non_tensor=True)
        traced = torch.jit.trace(adapter, adapter.flattened_inputs, check_trace=False)
        flattened_outputs = traced(*adapter.flattened_inputs)
        print(flattened_outputs)

    torch.onnx.export(traced, *adapter.flattened_inputs, "navgpt.onnx", verbose=True, input_names=['obs'], output_names=['mu','log_std', 'value'])

    onnx_model = onnx.load("navgpt.onnx")

    # Check that the model is well formed
    onnx.checker.check_model(onnx_model)

    ort_model = ort.InferenceSession("navgpt.onnx")

    outputs = ort_model.run(
        None,
        {"obs": np.zeros((1, 3)).astype(np.float32)},
    )
    print(outputs)

The error I am getting is related to the environment setup.

  File "rlgames_ros_all.py", line 121, in <module>
    runner.run({
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 135, in run
    self.run_play(args)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 120, in run_play
    player = self.create_player()
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 126, in create_player
    return self.player_factory.create(self.algo_name, params=self.params)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/object_factory.py", line 15, in create
    return builder(**kwargs)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 43, in <lambda>
    self.player_factory.register_builder('a2c_continuous', lambda **kwargs : players.PpoPlayerContinuous(**kwargs))
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/algos_torch/players.py", line 21, in __init__
    BasePlayer.__init__(self, params)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/player.py", line 39, in __init__
    self.env_info = env_configurations.get_env_info(self.env)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/env_configurations.py", line 422, in get_env_info
    result_shapes['observation_space'] = env.observation_space
AttributeError: 'VecEnvRLGames' object has no attribute 'observation_space'

Is there a clean way to achieve this without using "env"? Since "env" is technically the real world environment, it doesn't make sense to include this variable.

Any insight would be much appreciated.

Quest2GM commented 2 months ago

As an update, I changed the env_name from rlgpu to env_pool, like in the example. However, now I get this error:

Traceback (most recent call last):
  File "rlgames_ros_all.py", line 111, in <module>
    runner.run({
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 135, in run
    self.run_play(args)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 120, in run_play
    player = self.create_player()
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 126, in create_player
    return self.player_factory.create(self.algo_name, params=self.params)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/object_factory.py", line 15, in create
    return builder(**kwargs)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/torch_runner.py", line 43, in <lambda>
    self.player_factory.register_builder('a2c_continuous', lambda **kwargs : players.PpoPlayerContinuous(**kwargs))
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/algos_torch/players.py", line 21, in __init__
    BasePlayer.__init__(self, params)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/player.py", line 38, in __init__
    self.env = self.create_env()
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/player.py", line 256, in create_env
    return env_configurations.configurations[self.env_name]['env_creator'](**self.env_config)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/common/env_configurations.py", line 411, in <lambda>
    'env_creator': lambda **kwargs: create_envpool(**kwargs),
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/envs/envpool.py", line 105, in create_envpool
    return Envpool("", kwargs.pop('num_actors', 16), **kwargs)
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/rl_games/envs/envpool.py", line 23, in __init__
    self.env = envpool.make( env_name,
  File "/home/asblab2/sinarasi/venv_isaac/lib/python3.8/site-packages/envpool/registration.py", line 61, in make
    assert task_id in self.specs, \
AssertionError: NavGPT is not supported, `envpool.list_all_envs()` may help.

Again the question stands as to how to support newly added environments?

denysm88 commented 2 months ago

Were you able to successfuly instal isaacgym in google colab? Does it work with ant for example?