[RLlib] disable_env_checking not working with wrapped env

What happened + What you expected to happen

As seen in the reproduction script, I tried to instantiate PPOConfigfor an environment. When using the option PPOConfig().environment("myenv_wrapped", disable_env_checking=True), instance creation fails with below error trace. In contrast, enabling the env checking module works positive, i.e. the wrapper is used and the error message of missing encoder config does not appear.

2024-03-26 20:08:54,525 ERROR actor_manager.py:517 -- Ray error, taking actor 1 out of service. The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=24180, ip=192.168.178.26, actor_id=5e09c7daf37f73edf841d1a801000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000020C9D98F970>) File "python\ray_raylet.pyx", line 1889, in ray._raylet.execute_task File "python\ray_raylet.pyx", line 1830, in ray._raylet.execute_task.function_executor File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray_private\function_manager.py", line 724, in actor_method_executor return method(ray_actor, *args, kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, *_args, *_kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 535, in init self._update_policy_map(policy_dict=self.policy_dict) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, _args, _kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1743, in _update_policy_map self._build_policy_map( File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, *_args, **_kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1854, in _build_policy_map new_policy = create_policy_for_framework( File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\utils\policy.py", line 141, in create_policy_for_framework return policy_class(observation_space, action_space, merged_config) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\algorithms\ppo\ppo_torch_policy.py", line 49, in init TorchPolicyV2.init( File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 90, in init model = self.make_rl_module() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\policy\policy.py", line 427, in make_rl_module marl_module = marl_spec.build() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\marl_module.py", line 531, in build module = self.marl_module_class(module_config) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init previous_init(self, *args, kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\marl_module.py", line 75, in init super().init(config or MultiAgentRLModuleConfig()) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 376, in init self.setup() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\marl_module.py", line 85, in setup self._rl_modules[module_id] = module_spec.build() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 102, in build module = self.module_class(module_config) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init previous_init(self, *args, *kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init previous_init(self, args, kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\examples\rl_module\action_masking_rlm.py", line 29, in init super().init__(config) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init previous_init(self, *args, kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 384, in new_init previous_init(self, *args, *kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\torch\torch_rl_module.py", line 85, in init RLModule.init(self, args, kwargs) File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 376, in init self.setup() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\algorithms\ppo\ppo_rl_module.py", line 20, in setup catalog = self.config.get_catalog() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\rl_module\rl_module.py", line 196, in get_catalog return self.catalog_class( File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\algorithms\ppo\ppo_catalog.py", line 69, in init super().init( File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\models\catalog.py", line 112, in init self._determine_components_hook() File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\models\catalog.py", line 132, in _determine_components_hook self._encoder_config = self._get_encoder_config( File "C:\Users\Philipp\anaconda3\envs\torch-gpu-310\lib\site-packages\ray\rllib\core\models\catalog.py", line 368, in _get_encoder_config raise ValueError( ValueError: No default encoder config for obs space=Box(0.0, 1.0, (3, 4), float32), lstm=False and attention=False found. 2D Box spaces are not supported. They should be either flattened to a 1D Box space or enhanced to be a 3D box space.

Versions / Dependencies

gymnasium==0.28.1 ray==2.10.0

Reproduction script

import logging
from pprint import pprint
from typing import OrderedDict, Tuple
import gymnasium
import numpy as np
from gymnasium.spaces import Box, Discrete, Dict
from gymnasium.wrappers import TransformObservation
import ray
from ray.rllib.algorithms import PPOConfig
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.examples.rl_module.action_masking_rlm import TorchActionMaskRLM
from ray.tune.registry import register_env

logger = logging.getLogger()
logger.setLevel("WARN")

class MyRealObsWrapper(TransformObservation):
    """Special Wrapper needed for new RLlib API stack."""

    def __init__(self, env):
        super().__init__(env, self.__transform)

    def __transform(self, orig_obs):
        new_obs = orig_obs
        for b in new_obs.keys():
            if b not in ["static_features"]:
                new_obs[b] = np.reshape(new_obs[b], -1)
        # Important to update the observation space, otherwise the RLlib algorithms will not work
        self.observation_space["observations"] = Box(0, 1, (len(new_obs["observations"]),))
        return new_obs

class MyEnv(gymnasium.Env):
    def __init__(self, *args, **kwargs):
        print("Init method called.")        
        self.action_space = Discrete(3)
        self.observation_space = Dict(
            {
                "action_mask": Box(
                    low=0, high=1, shape=(self.action_space.n,), dtype=np.int8
                ),
                "observations": Box(
                    low=0.0,
                    high=1.0,
                    shape=(3, 4),
                    dtype=np.float32,
                ),
                # "static_features": Dict(...)
            }
        )
        self.episode_done = False
        self._action_max_helper = np.ones(self.action_space.n, dtype=np.int8)
        self.state = np.zeros((3, 4), dtype=np.float32)

    def step(self, action: int) -> Tuple[OrderedDict, float, bool, bool, dict]:
        print(f"Step function called with action {action}.")
        # Error handling for invalid action
        if (action < 0) | (action > self.action_space.n):
            e_string = f"Action [{action}] is not valid! Size of the action space: [{self.action_space.n}]."
            raise Exception(e_string)
        if self._action_max_helper[action] == 0:
            e_string = f"Action [{action}] is not valid as chosen already in episode !"
            raise Exception(e_string)

        some_dict = {}
        if action not in some_dict.keys():
            some_dict[action] = 1
            logger.warning(f"Action key added to dict.")
        print(f"Existing value in dict: {some_dict[action]}")

        reward = 0 - action
        self.state[action][0] = 1
        self._action_max_helper[action] = 0
        if all(self._action_max_helper[k] == 0 for k in range(3)):
            self.episode_done = True
        print(f"State after step: {self.state}.")
        return self._get_state_repr(), reward, self.episode_done, False, {}

    def _get_state_repr(self) -> OrderedDict:
        return {
            "action_mask": self._action_max_helper,
            "observations": self.state,
        }

    def reset(self, *, seed=None, options=None) -> Tuple[OrderedDict, dict]:
        print("Reset method called.")
        self.episode_done = False
        # Initial state representation = shape of the obs space.
        self.state = np.zeros((3, 4), dtype=np.float32)
        # Initial action mask = all actions are allowed.
        self._action_max_helper = np.ones(self.action_space.n, dtype=np.int8)
        return self._get_state_repr(), {}

def env_creator(env_config):
    env = MyEnv()
    env = MyRealObsWrapper(env)
    return env

# Use classic API to register environment
register_env("myenv_wrapped", env_creator)

if __name__ == "__main__":
    rlm_spec = SingleAgentRLModuleSpec(module_class=TorchActionMaskRLM)

    # Algorithm Config, but with the latest RLlib API
    config = (
        PPOConfig().environment("myenv_wrapped", disable_env_checking=True)
        # We need to disable preprocessing of observations, because preprocessing
        # would flatten the observation dict of the environment.
        .experimental(_disable_preprocessor_api=True, _enable_new_api_stack=True)
        .framework("torch")
        .resources(
            num_gpus=1, num_cpus_per_worker=2, num_gpus_per_worker=0.3
        )
        .rl_module(rl_module_spec=rlm_spec)
        .training(lr=1e-3, train_batch_size=50, sgd_minibatch_size=10)
    )

    algo = config.build()

    # run manual training loop and print results after each iteration
    for i in range(2):
        result = algo.train()
        print(f"Training iteration: {i+1} done")
        # pprint(result)

    ray.shutdown()

Issue Severity

Low: It annoys or frustrates me.

ray-project / ray

[RLlib] disable_env_checking not working with wrapped env #44306

What happened + What you expected to happen

Versions / Dependencies

Reproduction script

Issue Severity