Closed ava6969 closed 3 years ago
Hi @ava6969 ,
Can you provide more info? What robosuite task are you running? What rllib algo are you using? A reproducible script would be greatly appreciated.
Hi @ava6969 ,
Can you provide more info? What robosuite task are you running? What rllib algo are you using? A reproducible script would be greatly appreciated.
I am doing a simple task with stacking. i am using ray tune and rllib with PPO , same thing happens with other algorithms. I am using multiple workers 30 + . do you think it may be from ray rllib
Hmm, I'm not entirely sure. I've used rllib in the past and I don't recall running into this issue. Can you check if you get the same issue using an older version of robosuite (v1.0 or v1.1)?
I reverted back. i still get the same error with previous versions.
""" This file implements a wrapper for facilitating compatibility with OpenAI gym. This is useful when using these environments with code that assumes a gym-like interface. """ import time from collections import OrderedDict
import numpy as np from gym import spaces from robosuite.wrappers import Wrapper
class BasicMultiDiscrete(Wrapper): env = None
def __init__(self, env, keys=None, action_bin=11):
"""
Initializes the Gym wrapper.
Args:
env (MujocoEnv instance): The environment to wrap.
keys (list of strings): If provided, each observation will
consist of concatenated keys from the wrapped environment's
observation dictionary. Defaults to robot-state and object-state.
"""
super().__init__(env)
self.env = env
obs = env.reset()
# set up observation and action spaces
excludes = {'robot0_robot-state', 'object-state'}
shapes = {k: spaces.Box(low=-np.inf, high=np.inf, shape=obs[k].shape, dtype=np.float32)
for k in obs.keys() if k not in excludes}
self.observation_space = spaces.Dict(shapes)
low, high = self.env.action_spec
self.actions_map = np.array([np.linspace(-1, 1, action_bin) for _ in range(len(low))])
self.action_space = spaces.MultiDiscrete([action_bin] * len(low))
def format(self, obs_dict:OrderedDict):
"""
Filters keys of interest out and concatenate the information.
Args:
obs_dict: ordered dictionary of observations
"""
obs_dict.pop('robot0_robot-state')
obs_dict.pop('object-state')
return dict(obs_dict)
def reset(self):
ob_dict = self.env.reset()
return self.format(ob_dict)
def step(self, action):
actions = np.take(self.actions_map, action)
ob_dict, reward, done, info = self.env.step(actions)
return self.format(ob_dict), reward, done, info
from ray.rllib import SampleBatch from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.models import ModelCatalog, ModelV2 from ray.rllib.models.torch.misc import SlimFC, normc_initializer from ray.rllib.policy.view_requirement import ViewRequirement from ray.rllib.utils import override from ray.rllib.utils.typing import TensorType from typing import List, Dict import torch from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN import torch.nn as nn import numpy as np import gym
from models.blocks import RMCBlock
class MultiDiscreteEmbedLSTMAttention(TorchModelV2, nn.Module): def init(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.init(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.init(self)
hidden_sz = model_config.get('fcnet_hiddens', [256, 256])
activation = model_config.get('activation', 'tanh')
fc = nn.ModuleList()
space = obs_space.original_space.spaces
features = nn.ModuleDict()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.feature_sz = 64
self.num_heads = 3
self.node_size = 128
self.N = 13
self.embed_dim = self.num_heads * self.node_size
for k, v in space.items():
if isinstance(space, gym.spaces.Discrete):
features[k] = SlimFC(
in_size=v.n,
out_size=self.feature_sz,
initializer=normc_initializer(1.0),
activation_fn=activation).to(device)
else:
features[k] = SlimFC(
in_size=v.shape[0],
out_size=self.feature_sz,
initializer=normc_initializer(1.0),
activation_fn=activation).to(device)
self.rmc_1 = RMCBlock(self.feature_sz, self.embed_dim, self.num_heads, self.node_size, self.N)
self.fc = fc.to(device)
self.features = features.to(device)
self.num_outputs = self.node_size
self._last = None
self.device = device
@override(TorchModelV2)
def forward(self, input_dict: Dict[str, TensorType],
state: List[TensorType],
seq_lens: TensorType) -> (TensorType, List[TensorType]):
obs = input_dict['obs']
_features = []
for k in obs.keys():
_features += [self.features[k](obs[k])]
rmc_input = torch.stack(_features, 1)
_features = self.rmc_1(rmc_input)
self._last = _features
return _features, []
@override(TorchModelV2)
def value_function(self):
return torch.from_numpy(np.zeros(shape=(self._last_batch_size, )))
ModelCatalog.register_custom_model("multi_discrete_lstm_attn", MultiDiscreteEmbedLSTMAttention)
from six.moves import input from ray.tune import tune import datetime import ray import ray.rllib.agents.ppo as ppo
def main():
ray.init(object_store_memory=1e+9)
config = ppo.DEFAULT_CONFIG
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
name = 'robot'
model_name = f'{name}_{now}'
print(f'\33]0;{model_name} - {name}\a', end='', flush=True)
env_id = "multidiscrete_robot-v0"
config['num_gpus'] = 1
config["framework"] = "torch"
config['num_workers'] = 31
config["gamma"] = 0.998
config["lr"] = 3e-4
config["train_batch_size"] = 320000
config["env"] = env_id
config['model']['custom_model'] = 'multi_discrete_lstm_attn'
config['model']["fcnet_hiddens"] = [1024]
config['model']["fcnet_activation"] = 'relu'
config['model']["vf_share_layers"] = False
config['model']['max_seq_len'] = 10
config['model']['use_lstm'] = True
config['model']["lstm_cell_size"] = 512
# config['model']["lstm_use_prev_action"] = True
# config['model']["lstm_use_prev_reward"] = True
config["num_envs_per_worker"] = 2
config["rollout_fragment_length"] = 10
config["entropy_coeff"] = 0.01
config["lambda"] = 0.95
config["sgd_minibatch_size"] = 32000
config["num_sgd_iter"] = 60
# config["remote_worker_envs"] = True
config["clip_param"] = 0.2
tune.run(
'PPO',
name=model_name,
local_dir='results',
config=config,
reuse_actors=True,
checkpoint_at_end=True,
verbose=3,
# restore=RESTORE_PATH,
# restore='/home/dewe/PycharmProjects/robotics_drl/results/robot_20210329-18h17/PPO_multidiscrete_robot-v0_9d8b0_00000_0_2021-03-29_18-17-57/checkpoint_920/checkpoint-920',
checkpoint_freq=10
)
ray.shutdown()
if name == 'main': main()
i think its a ray problem dude to to their system, i used a custom implementation
when i train with ray rllib, The ram usage keep increasing, is there something that needs to be cleaned up. ray works in other environments easily.