ARISE-Initiative / robosuite

robosuite: A Modular Simulation Framework and Benchmark for Robot Learning
https://robosuite.ai
Other
1.24k stars 397 forks source link

Ram usage keeps increasing when training with ray rllib #207

Closed ava6969 closed 3 years ago

ava6969 commented 3 years ago

when i train with ray rllib, The ram usage keep increasing, is there something that needs to be cleaned up. ray works in other environments easily.

cremebrule commented 3 years ago

Hi @ava6969 ,

Can you provide more info? What robosuite task are you running? What rllib algo are you using? A reproducible script would be greatly appreciated.

ava6969 commented 3 years ago

Hi @ava6969 ,

Can you provide more info? What robosuite task are you running? What rllib algo are you using? A reproducible script would be greatly appreciated.

I am doing a simple task with stacking. i am using ray tune and rllib with PPO , same thing happens with other algorithms. I am using multiple workers 30 + . do you think it may be from ray rllib

cremebrule commented 3 years ago

Hmm, I'm not entirely sure. I've used rllib in the past and I don't recall running into this issue. Can you check if you get the same issue using an older version of robosuite (v1.0 or v1.1)?

ava6969 commented 3 years ago

I reverted back. i still get the same error with previous versions.

""" This file implements a wrapper for facilitating compatibility with OpenAI gym. This is useful when using these environments with code that assumes a gym-like interface. """ import time from collections import OrderedDict

import numpy as np from gym import spaces from robosuite.wrappers import Wrapper

class BasicMultiDiscrete(Wrapper): env = None

def __init__(self, env, keys=None, action_bin=11):
    """
    Initializes the Gym wrapper.

    Args:
        env (MujocoEnv instance): The environment to wrap.
        keys (list of strings): If provided, each observation will
            consist of concatenated keys from the wrapped environment's
            observation dictionary. Defaults to robot-state and object-state.
    """
    super().__init__(env)
    self.env = env

    obs = env.reset()
    # set up observation and action spaces
    excludes = {'robot0_robot-state', 'object-state'}

    shapes = {k: spaces.Box(low=-np.inf, high=np.inf, shape=obs[k].shape, dtype=np.float32)
              for k in obs.keys() if k not in excludes}

    self.observation_space = spaces.Dict(shapes)
    low, high = self.env.action_spec

    self.actions_map = np.array([np.linspace(-1, 1, action_bin) for _ in range(len(low))])
    self.action_space = spaces.MultiDiscrete([action_bin] * len(low))

def format(self, obs_dict:OrderedDict):
    """
    Filters keys of interest out and concatenate the information.

    Args:
        obs_dict: ordered dictionary of observations
    """

    obs_dict.pop('robot0_robot-state')
    obs_dict.pop('object-state')
    return dict(obs_dict)

def reset(self):
    ob_dict = self.env.reset()
    return self.format(ob_dict)

def step(self, action):
    actions = np.take(self.actions_map, action)
    ob_dict, reward, done, info = self.env.step(actions)
    return self.format(ob_dict), reward, done, info

from ray.rllib import SampleBatch from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.models import ModelCatalog, ModelV2 from ray.rllib.models.torch.misc import SlimFC, normc_initializer from ray.rllib.policy.view_requirement import ViewRequirement from ray.rllib.utils import override from ray.rllib.utils.typing import TensorType from typing import List, Dict import torch from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN import torch.nn as nn import numpy as np import gym

from models.blocks import RMCBlock

class MultiDiscreteEmbedLSTMAttention(TorchModelV2, nn.Module): def init(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.init(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.init(self)

    hidden_sz = model_config.get('fcnet_hiddens', [256, 256])
    activation = model_config.get('activation', 'tanh')
    fc = nn.ModuleList()

    space = obs_space.original_space.spaces
    features = nn.ModuleDict()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    self.feature_sz = 64
    self.num_heads = 3
    self.node_size = 128
    self.N = 13

    self.embed_dim = self.num_heads * self.node_size

    for k, v in space.items():
        if isinstance(space, gym.spaces.Discrete):
            features[k] = SlimFC(
                in_size=v.n,
                out_size=self.feature_sz,
                initializer=normc_initializer(1.0),
                activation_fn=activation).to(device)
        else:
            features[k] = SlimFC(
                in_size=v.shape[0],
                out_size=self.feature_sz,
                initializer=normc_initializer(1.0),
                activation_fn=activation).to(device)

    self.rmc_1 = RMCBlock(self.feature_sz, self.embed_dim, self.num_heads, self.node_size, self.N)

    self.fc = fc.to(device)
    self.features = features.to(device)
    self.num_outputs = self.node_size
    self._last = None
    self.device = device

@override(TorchModelV2)
def forward(self, input_dict: Dict[str, TensorType],
            state: List[TensorType],
            seq_lens: TensorType) -> (TensorType, List[TensorType]):

    obs = input_dict['obs']
    _features = []

    for k in obs.keys():
        _features += [self.features[k](obs[k])]

    rmc_input = torch.stack(_features, 1)

    _features = self.rmc_1(rmc_input)

    self._last = _features
    return _features, []

@override(TorchModelV2)
def value_function(self):
    return torch.from_numpy(np.zeros(shape=(self._last_batch_size, )))

ModelCatalog.register_custom_model("multi_discrete_lstm_attn", MultiDiscreteEmbedLSTMAttention)

from six.moves import input from ray.tune import tune import datetime import ray import ray.rllib.agents.ppo as ppo

def main():

ray.init(object_store_memory=1e+9)
config = ppo.DEFAULT_CONFIG
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
name = 'robot'
model_name = f'{name}_{now}'
print(f'\33]0;{model_name} - {name}\a', end='', flush=True)

env_id = "multidiscrete_robot-v0"

config['num_gpus'] = 1
config["framework"] = "torch"
config['num_workers'] = 31
config["gamma"] = 0.998
config["lr"] = 3e-4
config["train_batch_size"] = 320000
config["env"] = env_id
config['model']['custom_model'] = 'multi_discrete_lstm_attn'
config['model']["fcnet_hiddens"] = [1024]
config['model']["fcnet_activation"] = 'relu'
config['model']["vf_share_layers"] = False
config['model']['max_seq_len'] = 10
config['model']['use_lstm'] = True
config['model']["lstm_cell_size"] = 512
# config['model']["lstm_use_prev_action"] = True
# config['model']["lstm_use_prev_reward"] = True
config["num_envs_per_worker"] = 2
config["rollout_fragment_length"] = 10
config["entropy_coeff"] = 0.01
config["lambda"] = 0.95
config["sgd_minibatch_size"] = 32000
config["num_sgd_iter"] = 60
# config["remote_worker_envs"] = True
config["clip_param"]  = 0.2

tune.run(
    'PPO',
    name=model_name,
    local_dir='results',
    config=config,
    reuse_actors=True,
    checkpoint_at_end=True,
    verbose=3,
    # restore=RESTORE_PATH,
    # restore='/home/dewe/PycharmProjects/robotics_drl/results/robot_20210329-18h17/PPO_multidiscrete_robot-v0_9d8b0_00000_0_2021-03-29_18-17-57/checkpoint_920/checkpoint-920',
    checkpoint_freq=10
)

ray.shutdown()

if name == 'main': main()

ava6969 commented 3 years ago

i think its a ray problem dude to to their system, i used a custom implementation