Open lie12huo opened 8 months ago
@lie12huo have you figured a way to solve it, i am facing the same issue
@SecondTheFirst This seems to be a version compatibility issue that I have never solved.
facing the same issue, running it in a docker container https://hub.docker.com/r/lionelpeer/sumo-rl
Facing the same issue. Any ideas?
Facing the same issue. Any ideas?
Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import PettingZooEnv
from there instead of from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
.
It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release.
from typing import Optional
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space
from ray.rllib.utils.typing import MultiAgentDict
# import any
from gymnasium.spaces import Tuple
@PublicAPI
class PettingZooEnv(MultiAgentEnv):
"""An interface to the PettingZoo MARL environment library.
See: https://github.com/Farama-Foundation/PettingZoo
Inherits from MultiAgentEnv and exposes a given AEC
(actor-environment-cycle) game from the PettingZoo project via the
MultiAgentEnv public API.
Note that the wrapper has some important limitations:
1. All agents have the same action_spaces and observation_spaces.
Note: If, within your aec game, agents do not have homogeneous action /
observation spaces, apply SuperSuit wrappers
to apply padding functionality: https://github.com/Farama-Foundation/
SuperSuit#built-in-multi-agent-only-functions
2. Environments are positive sum games (-> Agents are expected to cooperate
to maximize reward). This isn't a hard restriction, it just that
standard algorithms aren't expected to work well in highly competitive
games.
Examples:
>>> from pettingzoo.butterfly import prison_v3
>>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
>>> env = PettingZooEnv(prison_v3.env())
>>> obs, infos = env.reset()
>>> print(obs)
# only returns the observation for the agent which should be stepping
{
'prisoner_0': array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
}
>>> obs, rewards, terminateds, truncateds, infos = env.step({
... "prisoner_0": 1
... })
# only returns the observation, reward, info, etc, for
# the agent who's turn is next.
>>> print(obs)
{
'prisoner_1': array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
}
>>> print(rewards)
{
'prisoner_1': 0
}
>>> print(terminateds)
{
'prisoner_1': False, '__all__': False
}
>>> print(truncateds)
{
'prisoner_1': False, '__all__': False
}
>>> print(infos)
{
'prisoner_1': {'map_tuple': (1, 0)}
}
"""
def __init__(self, env):
super().__init__()
self.env = env
env.reset()
# Since all agents have the same spaces, do not provide full observation-
# and action-spaces as Dicts, mapping agent IDs to the individual
# agents' spaces. Instead, `self.[action|observation]_space` are the single
# agent spaces.
self._obs_space_in_preferred_format = False
self._action_space_in_preferred_format = False
# Collect the individual agents' spaces (they should all be the same):
first_obs_space = self.env.observation_space(self.env.agents[0])
first_action_space = self.env.action_space(self.env.agents[0])
for agent in self.env.agents:
if self.env.observation_space(agent) != first_obs_space:
raise ValueError(
"Observation spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_observations wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_observations(env)`"
)
if self.env.action_space(agent) != first_action_space:
raise ValueError(
"Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (usage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
# Convert from gym to gymnasium, if necessary.
self.observation_space = convert_old_gym_space_to_gymnasium_space(
first_obs_space
)
self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)
self._agent_ids = self.env.agents
def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
if agent_ids is None:
agent_ids = self._agent_ids
return {id: self.observation_space.sample() for id in agent_ids}
def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
if agent_ids is None:
agent_ids = self._agent_ids
return {id: self.action_space.sample() for id in agent_ids}
def action_space_contains(self, x: MultiAgentDict) -> bool:
if not isinstance(x, dict):
return False
return all(self.action_space.contains(val) for val in x.values())
def observation_space_contains(self, x: MultiAgentDict) -> bool:
if not isinstance(x, dict):
return False
return all(self.observation_space.contains(val) for val in x.values())
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
info = self.env.reset(seed=seed, options=options)
return (
{self.env.agent_selection: self.env.observe(self.env.agent_selection)},
info or {},
)
def step(self, action):
self.env.step(action[self.env.agent_selection])
obs_d = {}
rew_d = {}
terminated_d = {}
truncated_d = {}
info_d = {}
while self.env.agents:
obs, rew, terminated, truncated, info = self.env.last()
agent_id = self.env.agent_selection
obs_d[agent_id] = obs
rew_d[agent_id] = rew
terminated_d[agent_id] = terminated
truncated_d[agent_id] = truncated
info_d[agent_id] = info
if (
self.env.terminations[self.env.agent_selection]
or self.env.truncations[self.env.agent_selection]
):
self.env.step(None)
else:
break
all_gone = not self.env.agents
terminated_d["__all__"] = all_gone and all(terminated_d.values())
truncated_d["__all__"] = all_gone and all(truncated_d.values())
return obs_d, rew_d, terminated_d, truncated_d, info_d
def close(self):
self.env.close()
def render(self):
return self.env.render(self.render_mode)
@property
def get_sub_environments(self):
return self.env.unwrapped
@PublicAPI
class ParallelPettingZooEnv(MultiAgentEnv):
def __init__(self, env):
super().__init__()
self.par_env = env
self.par_env.reset()
# Since all agents have the same spaces, do not provide full observation-
# and action-spaces as Dicts, mapping agent IDs to the individual
# agents' spaces. Instead, `self.[action|observation]_space` are the single
# agent spaces.
self._obs_space_in_preferred_format = False
self._action_space_in_preferred_format = False
# Get first observation space, assuming all agents have equal space
self.observation_space = self.par_env.observation_space(self.par_env.agents[0])
# Get first action space, assuming all agents have equal space
self.action_space = self.par_env.action_space(self.par_env.agents[0])
assert all(
self.par_env.observation_space(agent) == self.observation_space
for agent in self.par_env.agents
), (
"Observation spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_observations wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_observations(env)`"
)
if not all(
self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents
):
print("Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
assert all(
self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents
), (
"Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
obs, info = self.par_env.reset(seed=seed, options=options)
return obs, info or {}
def step(self, action_dict):
obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict)
terminateds["__all__"] = all(terminateds.values())
truncateds["__all__"] = all(truncateds.values())
return obss, rews, terminateds, truncateds, infos
def close(self):
self.par_env.close()
def render(self):
return self.par_env.render(self.render_mode)
@property
def get_sub_environments(self):
return self.par_env.unwrapped
Facing the same issue. Any ideas?
Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import
PettingZooEnv
from there instead offrom ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
.It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release.
from typing import Optional from ray.rllib.env.multi_agent_env import MultiAgentEnv from ray.rllib.utils.annotations import PublicAPI from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space from ray.rllib.utils.typing import MultiAgentDict # import any from gymnasium.spaces import Tuple @PublicAPI class PettingZooEnv(MultiAgentEnv): """An interface to the PettingZoo MARL environment library. See: https://github.com/Farama-Foundation/PettingZoo Inherits from MultiAgentEnv and exposes a given AEC (actor-environment-cycle) game from the PettingZoo project via the MultiAgentEnv public API. Note that the wrapper has some important limitations: 1. All agents have the same action_spaces and observation_spaces. Note: If, within your aec game, agents do not have homogeneous action / observation spaces, apply SuperSuit wrappers to apply padding functionality: https://github.com/Farama-Foundation/ SuperSuit#built-in-multi-agent-only-functions 2. Environments are positive sum games (-> Agents are expected to cooperate to maximize reward). This isn't a hard restriction, it just that standard algorithms aren't expected to work well in highly competitive games. Examples: >>> from pettingzoo.butterfly import prison_v3 >>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv >>> env = PettingZooEnv(prison_v3.env()) >>> obs, infos = env.reset() >>> print(obs) # only returns the observation for the agent which should be stepping { 'prisoner_0': array([[[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]]], dtype=uint8) } >>> obs, rewards, terminateds, truncateds, infos = env.step({ ... "prisoner_0": 1 ... }) # only returns the observation, reward, info, etc, for # the agent who's turn is next. >>> print(obs) { 'prisoner_1': array([[[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]]], dtype=uint8) } >>> print(rewards) { 'prisoner_1': 0 } >>> print(terminateds) { 'prisoner_1': False, '__all__': False } >>> print(truncateds) { 'prisoner_1': False, '__all__': False } >>> print(infos) { 'prisoner_1': {'map_tuple': (1, 0)} } """ def __init__(self, env): super().__init__() self.env = env env.reset() # Since all agents have the same spaces, do not provide full observation- # and action-spaces as Dicts, mapping agent IDs to the individual # agents' spaces. Instead, `self.[action|observation]_space` are the single # agent spaces. self._obs_space_in_preferred_format = False self._action_space_in_preferred_format = False # Collect the individual agents' spaces (they should all be the same): first_obs_space = self.env.observation_space(self.env.agents[0]) first_action_space = self.env.action_space(self.env.agents[0]) for agent in self.env.agents: if self.env.observation_space(agent) != first_obs_space: raise ValueError( "Observation spaces for all agents must be identical. Perhaps " "SuperSuit's pad_observations wrapper can help (useage: " "`supersuit.aec_wrappers.pad_observations(env)`" ) if self.env.action_space(agent) != first_action_space: raise ValueError( "Action spaces for all agents must be identical. Perhaps " "SuperSuit's pad_action_space wrapper can help (usage: " "`supersuit.aec_wrappers.pad_action_space(env)`" ) # Convert from gym to gymnasium, if necessary. self.observation_space = convert_old_gym_space_to_gymnasium_space( first_obs_space ) self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space) self._agent_ids = self.env.agents def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict: if agent_ids is None: agent_ids = self._agent_ids return {id: self.observation_space.sample() for id in agent_ids} def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict: if agent_ids is None: agent_ids = self._agent_ids return {id: self.action_space.sample() for id in agent_ids} def action_space_contains(self, x: MultiAgentDict) -> bool: if not isinstance(x, dict): return False return all(self.action_space.contains(val) for val in x.values()) def observation_space_contains(self, x: MultiAgentDict) -> bool: if not isinstance(x, dict): return False return all(self.observation_space.contains(val) for val in x.values()) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): info = self.env.reset(seed=seed, options=options) return ( {self.env.agent_selection: self.env.observe(self.env.agent_selection)}, info or {}, ) def step(self, action): self.env.step(action[self.env.agent_selection]) obs_d = {} rew_d = {} terminated_d = {} truncated_d = {} info_d = {} while self.env.agents: obs, rew, terminated, truncated, info = self.env.last() agent_id = self.env.agent_selection obs_d[agent_id] = obs rew_d[agent_id] = rew terminated_d[agent_id] = terminated truncated_d[agent_id] = truncated info_d[agent_id] = info if ( self.env.terminations[self.env.agent_selection] or self.env.truncations[self.env.agent_selection] ): self.env.step(None) else: break all_gone = not self.env.agents terminated_d["__all__"] = all_gone and all(terminated_d.values()) truncated_d["__all__"] = all_gone and all(truncated_d.values()) return obs_d, rew_d, terminated_d, truncated_d, info_d def close(self): self.env.close() def render(self): return self.env.render(self.render_mode) @property def get_sub_environments(self): return self.env.unwrapped @PublicAPI class ParallelPettingZooEnv(MultiAgentEnv): def __init__(self, env): super().__init__() self.par_env = env self.par_env.reset() # Since all agents have the same spaces, do not provide full observation- # and action-spaces as Dicts, mapping agent IDs to the individual # agents' spaces. Instead, `self.[action|observation]_space` are the single # agent spaces. self._obs_space_in_preferred_format = False self._action_space_in_preferred_format = False # Get first observation space, assuming all agents have equal space self.observation_space = self.par_env.observation_space(self.par_env.agents[0]) # Get first action space, assuming all agents have equal space self.action_space = self.par_env.action_space(self.par_env.agents[0]) assert all( self.par_env.observation_space(agent) == self.observation_space for agent in self.par_env.agents ), ( "Observation spaces for all agents must be identical. Perhaps " "SuperSuit's pad_observations wrapper can help (useage: " "`supersuit.aec_wrappers.pad_observations(env)`" ) if not all( self.par_env.action_space(agent) == self.action_space for agent in self.par_env.agents ): print("Action spaces for all agents must be identical. Perhaps " "SuperSuit's pad_action_space wrapper can help (useage: " "`supersuit.aec_wrappers.pad_action_space(env)`" ) assert all( self.par_env.action_space(agent) == self.action_space for agent in self.par_env.agents ), ( "Action spaces for all agents must be identical. Perhaps " "SuperSuit's pad_action_space wrapper can help (useage: " "`supersuit.aec_wrappers.pad_action_space(env)`" ) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): obs, info = self.par_env.reset(seed=seed, options=options) return obs, info or {} def step(self, action_dict): obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict) terminateds["__all__"] = all(terminateds.values()) truncateds["__all__"] = all(truncateds.values()) return obss, rews, terminateds, truncateds, infos def close(self): self.par_env.close() def render(self): return self.par_env.render(self.render_mode) @property def get_sub_environments(self): return self.par_env.unwrapped
It works! Thanks a lot!
When I executed the command “python experiments/ppo_4x4grid.py” for training, the following error occurred: