huawei-noah / SMARTS

Scalable Multi-Agent RL Training School for Autonomous Driving
MIT License
954 stars 190 forks source link

observations, _ = env.reset() problems #2124

Closed ggplz closed 11 months ago

ggplz commented 11 months ago

High Level Description

"""This is an example to show how SMARTS multi-agent works. This example uses the same kind of
agent multiple times. But different agents with different action and observation shapes can be mixed
in."""

import random
import sys
from pathlib import Path
from typing import Final

import gymnasium as gym

# SMARTS_REPO_PATH = Path(__file__).parents[1].absolute()
# sys.path.insert(0, str(SMARTS_REPO_PATH))
from tools.argument_parser import minimal_argument_parser
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType
from smarts.core.utils.episodes import episodes
from smarts.sstudio.scenario_construction import build_scenarios

N_AGENTS = 4
AGENT_IDS: Final[list] = ["Agent %i" % i for i in range(N_AGENTS)]

class RandomLanerAgent(Agent):
    def __init__(self, action_space) -> None:
        self._action_space = action_space

    def act(self, obs, **kwargs):
        return self._action_space.sample()

class KeepLaneAgent(Agent):
    def __init__(self, action_space) -> None:
        self._action_space = action_space

    def act(self, obs, **kwargs):
        return self._action_space.sample()

def main(scenarios, headless, num_episodes, max_episode_steps=None):
    # This interface must match the action returned by the agent
    agent_interfaces = {
        agent_id: AgentInterface.from_type(
            AgentType.Laner, max_episode_steps=max_episode_steps
        )
        for agent_id in AGENT_IDS
    }

    env = gym.make(
        "smarts.env:hiway-v1",
        scenarios=scenarios,
        agent_interfaces=agent_interfaces,
        headless=headless,
    )

    for episode in episodes(n=num_episodes):
        agents = {
            agent_id: RandomLanerAgent(env.action_space[agent_id])
            for agent_id in agent_interfaces.keys()
        }
        observations, _ = env.reset()
        episode.record_scenario(env.unwrapped.scenario_log)

        terminateds = {"__all__": False}
        while not terminateds["__all__"]:
            actions = {
                agent_id: agent.act(observations) for agent_id, agent in agents.items()
            }
            observations, rewards, terminateds, truncateds, infos = env.step(actions)
            episode.record_step(observations, rewards, terminateds, truncateds, infos)

    env.close()

if __name__ == "__main__":
    parser = minimal_argument_parser(Path(__file__).stem)
    args = parser.parse_args()

    if not args.scenarios:
        args.scenarios = [
            str("/Users/heshouliang/smart/pymarl-master/src/SMARTS/scenarios/sumo/loop"),
        ]

    build_scenarios(scenarios=args.scenarios)

    main(
        scenarios=args.scenarios,
        headless=args.headless,
        num_episodes=args.episodes,
        max_episode_steps=args.max_episode_steps,
    )

Version

smarts 1.4.0

Operating System

No response

Problems

In this examples , there are four agents

结果 = {dict: 4} {'Agent 0': <__main__.RandomLanerAgent object at 0x13eaf8100>, 'Agent 1': <__main__.RandomLanerAgent object at 0x13eaf8190>, 'Agent 2': <__main__.RandomLanerAgent object at 0x13ea32a30>, 'Agent 3': <__main__.RandomLanerAgent object at 0x13ea32b80>}
 'Agent 0' = {RandomLanerAgent} <__main__.RandomLanerAgent object at 0x13eaf8100>
 'Agent 1' = {RandomLanerAgent} <__main__.RandomLanerAgent object at 0x13eaf8190>
 'Agent 2' = {RandomLanerAgent} <__main__.RandomLanerAgent object at 0x13ea32a30>
 'Agent 3' = {RandomLanerAgent} <__main__.RandomLanerAgent object at 0x13ea32b80>
 __len__ = {int} 4

When I debug the code for the first time, the result of the code “observations, _ = env.reset() " have some problems as follows:

结果 = {dict: 1} {'Agent 2': {'active': 1, 'steps_completed': 1, 'distance_travelled': 0.0, 'ego_vehicle_state': {'angular_velocity': array([0., 0., 0.], dtype=float32), 'box': array([3.68, 1.47, 1.  ], dtype=float32), 'heading': 0.41247404, 'lane_id': '445633931_2', 'lane
 'Agent 2' = {dict: 8} {'active': 1, 'steps_completed': 1, 'distance_travelled': 0.0, 'ego_vehicle_state': {'angular_velocity': array([0., 0., 0.], dtype=float32), 'box': array([3.68, 1.47, 1.  ], dtype=float32), 'heading': 0.41247404, 'lane_id': '445633931_2', 'lane_index': 2, 
 __len__ = {int} 1

There are 4 agents at the initial moment, but the observation information obtained after initialization is only the information of a random agent. After the subsequent code is executed, the observation information of all agents will be updated. Moreover, sometimes the observation information of the agent cannot be obtained, and the entire dictionary is empty. Help me and how to solve the problem?

283
Gamenot commented 11 months ago

@ggplz, I am not certain what you are asking for. I will try to guess.

Observations will only come from the agents that currently have actors in the environment. There is a different option that returns all agents but the agents with an active actor can be determined by if it is "active".

    env = gym.make(
        "smarts.env:hiway-v1",
        ...,
        observation_options=ObservationOptions.full, # default is `ObservationOptions.formatted`
    )

    observations, _ = env.reset()
    for agent_id, ob in observations.items():
        print(f"'{agent_id}' {ob['active']=}")

    # 'Agent 0' ob['active']=False
    # 'Agent 1' ob['active']=False
    # 'Agent 2' ob['active']=True
    # 'Agent 3' ob['active']=False
Adaickalavan commented 11 months ago

Hi @ggplz,

Consider reading this https://smarts.readthedocs.io/en/latest/sim/env.html#multi-agent-scenario on multi-agent scenarios, which might be helpful.