Ray is a unified framework for scaling AI and Python applications. Ray consists of a core distributed runtime and a set of AI Libraries for accelerating ML workloads.
As seen in the reproduction script, I tried to instantiate PPOConfigfor an environment. When using the option PPOConfig().environment("myenv_wrapped", disable_env_checking=True), instance creation fails with below error trace. In contrast, enabling the env checking module works positive, i.e. the wrapper is used and the error message of missing encoder config does not appear.
2024-03-26 20:08:54,525 ERROR actor_manager.py:517 -- Ray error, taking actor 1 out of service. The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=24180, ip=192.168.178.26, actor_id=5e09c7daf37f73edf841d1a801000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000020C9D98F970>)
File "python\ray_raylet.pyx", line 1889, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 1830, in ray._raylet.execute_task.function_executor
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray_private\function_manager.py", line 724, in actor_method_executor
return method(ray_actor, *args, kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, *_kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 535, in init
self._update_policy_map(policy_dict=self.policy_dict)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, _args, _kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1743, in _update_policy_map
self._build_policy_map(
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1854, in _build_policy_map
new_policy = create_policy_for_framework(
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\utils\policy.py", line 141, in create_policy_for_framework
return policy_class(observation_space, action_space, merged_config)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\algorithms\ppo\ppo_torch_policy.py", line 49, in init
TorchPolicyV2.init(
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 90, in init
model = self.make_rl_module()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\policy\policy.py", line 427, in make_rl_module
marl_module = marl_spec.build()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\marl_module.py", line 531, in build
module = self.marl_module_class(module_config)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init
previous_init(self, *args, kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\marl_module.py", line 75, in init
super().init(config or MultiAgentRLModuleConfig())
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 376, in init
self.setup()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\marl_module.py", line 85, in setup
self._rl_modules[module_id] = module_spec.build()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 102, in build
module = self.module_class(module_config)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init
previous_init(self, *args, *kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init
previous_init(self, args, kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\examples\rl_module\action_masking_rlm.py", line 29, in init
super().init__(config)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init
previous_init(self, *args, kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init
previous_init(self, *args, *kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\torch\torch_rl_module.py", line 85, in init
RLModule.init(self, args, kwargs)
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 376, in init
self.setup()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\algorithms\ppo\ppo_rl_module.py", line 20, in setup
catalog = self.config.get_catalog()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 196, in get_catalog
return self.catalog_class(
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\algorithms\ppo\ppo_catalog.py", line 69, in init
super().init(
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\models\catalog.py", line 112, in init
self._determine_components_hook()
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\models\catalog.py", line 132, in _determine_components_hook
self._encoder_config = self._get_encoder_config(
File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\models\catalog.py", line 368, in _get_encoder_config
raise ValueError(
ValueError: No default encoder config for obs space=Box(0.0, 1.0, (3, 4), float32), lstm=False and attention=False found. 2D Box spaces are not supported. They should be either flattened to a 1D Box space or enhanced to be a 3D box space.
Versions / Dependencies
gymnasium==0.28.1
ray==2.10.0
Reproduction script
import logging
from pprint import pprint
from typing import OrderedDict, Tuple
import gymnasium
import numpy as np
from gymnasium.spaces import Box, Discrete, Dict
from gymnasium.wrappers import TransformObservation
import ray
from ray.rllib.algorithms import PPOConfig
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.examples.rl_module.action_masking_rlm import TorchActionMaskRLM
from ray.tune.registry import register_env
logger = logging.getLogger()
logger.setLevel("WARN")
class MyRealObsWrapper(TransformObservation):
"""Special Wrapper needed for new RLlib API stack."""
def __init__(self, env):
super().__init__(env, self.__transform)
def __transform(self, orig_obs):
new_obs = orig_obs
for b in new_obs.keys():
if b not in ["static_features"]:
new_obs[b] = np.reshape(new_obs[b], -1)
# Important to update the observation space, otherwise the RLlib algorithms will not work
self.observation_space["observations"] = Box(0, 1, (len(new_obs["observations"]),))
return new_obs
class MyEnv(gymnasium.Env):
def __init__(self, *args, **kwargs):
print("Init method called.")
self.action_space = Discrete(3)
self.observation_space = Dict(
{
"action_mask": Box(
low=0, high=1, shape=(self.action_space.n,), dtype=np.int8
),
"observations": Box(
low=0.0,
high=1.0,
shape=(3, 4),
dtype=np.float32,
),
# "static_features": Dict(...)
}
)
self.episode_done = False
self._action_max_helper = np.ones(self.action_space.n, dtype=np.int8)
self.state = np.zeros((3, 4), dtype=np.float32)
def step(self, action: int) -> Tuple[OrderedDict, float, bool, bool, dict]:
print(f"Step function called with action {action}.")
# Error handling for invalid action
if (action < 0) | (action > self.action_space.n):
e_string = f"Action [{action}] is not valid! Size of the action space: [{self.action_space.n}]."
raise Exception(e_string)
if self._action_max_helper[action] == 0:
e_string = f"Action [{action}] is not valid as chosen already in episode !"
raise Exception(e_string)
some_dict = {}
if action not in some_dict.keys():
some_dict[action] = 1
logger.warning(f"Action key added to dict.")
print(f"Existing value in dict: {some_dict[action]}")
reward = 0 - action
self.state[action][0] = 1
self._action_max_helper[action] = 0
if all(self._action_max_helper[k] == 0 for k in range(3)):
self.episode_done = True
print(f"State after step: {self.state}.")
return self._get_state_repr(), reward, self.episode_done, False, {}
def _get_state_repr(self) -> OrderedDict:
return {
"action_mask": self._action_max_helper,
"observations": self.state,
}
def reset(self, *, seed=None, options=None) -> Tuple[OrderedDict, dict]:
print("Reset method called.")
self.episode_done = False
# Initial state representation = shape of the obs space.
self.state = np.zeros((3, 4), dtype=np.float32)
# Initial action mask = all actions are allowed.
self._action_max_helper = np.ones(self.action_space.n, dtype=np.int8)
return self._get_state_repr(), {}
def env_creator(env_config):
env = MyEnv()
env = MyRealObsWrapper(env)
return env
# Use classic API to register environment
register_env("myenv_wrapped", env_creator)
if __name__ == "__main__":
rlm_spec = SingleAgentRLModuleSpec(module_class=TorchActionMaskRLM)
# Algorithm Config, but with the latest RLlib API
config = (
PPOConfig().environment("myenv_wrapped", disable_env_checking=True)
# We need to disable preprocessing of observations, because preprocessing
# would flatten the observation dict of the environment.
.experimental(_disable_preprocessor_api=True, _enable_new_api_stack=True)
.framework("torch")
.resources(
num_gpus=1, num_cpus_per_worker=2, num_gpus_per_worker=0.3
)
.rl_module(rl_module_spec=rlm_spec)
.training(lr=1e-3, train_batch_size=50, sgd_minibatch_size=10)
)
algo = config.build()
# run manual training loop and print results after each iteration
for i in range(2):
result = algo.train()
print(f"Training iteration: {i+1} done")
# pprint(result)
ray.shutdown()
What happened + What you expected to happen
As seen in the reproduction script, I tried to instantiate
PPOConfig
for an environment. When using the optionPPOConfig().environment("myenv_wrapped", disable_env_checking=True)
, instance creation fails with below error trace. In contrast, enabling the env checking module works positive, i.e. the wrapper is used and the error message of missing encoder config does not appear.Versions / Dependencies
gymnasium==0.28.1 ray==2.10.0
Reproduction script
Issue Severity
Low: It annoys or frustrates me.