Issues in Multi-Agent Environment Development

zkzfor commented 1 year ago

I need your help again. Currently, I need to create a multi-agent environment. To test the interface, I have written a random sampling environment to confirm if my understanding of the interface is correct. However, my testing code has encountered issues. Here is my environment code:

import copy
import numpy as np

# import os

import gymnasium
from gymnasium import spaces
from gymnasium.utils import seeding

# logging.basicConfig()
# logging.getLogger().setLevel(logging.ERROR)

class LeoHapEnv:
    def __init__(self, args):
        # self.env = ...
        self.n_agents = 3
        self.max_cycles = 5
        self.cur_step = 0
        self.share_observation_space = [spaces.Box(0.0, 1.0, (54,)),
                                        spaces.Box(0.0, 1.0, (54,)),
                                        spaces.Box(0.0, 1.0, (54,))]
        self.observation_space = [spaces.Box(0.0, 1.0, (18,)),
                                  spaces.Box(0.0, 1.0, (18,)),
                                  spaces.Box(0.0, 1.0, (18,))]
        self.action_space = [spaces.Box(0.0, 1.0, (5,)),
                             spaces.Box(0.0, 1.0, (5,)),
                             spaces.Box(0.0, 1.0, (5,))]

    def step(self, actions):
        self.cur_step += 1
        obs = []
        for agent_n in range(self.n_agents):
            obs.append(self.observation_space[agent_n].sample())

        state = []
        for agent_n in range(self.n_agents):
            state.append(self.share_observation_space[agent_n].sample())

        rewards = [[np.random.uniform(low=0.0, high=1.0, size=None)]] * self.n_agents

        infos = []
        for agent_n in range(self.n_agents):
            info = {}
            infos.append(info)

        dones = []
        if self.cur_step == self.max_cycles:
            for agent_n in range(self.n_agents):
                done = True
                infos[agent_n]["bad_transition"] = True
                dones.append(done)
        else:
            for agent_n in range(self.n_agents):
                done = False
                dones.append(done)

        return obs, state, rewards, dones, info, None

    def reset(self):

        self.cur_step = 0

        obs = []
        for agent_n in range(self.n_agents):
            obs.append(self.observation_space[agent_n].sample())

        state = []
        for agent_n in range(self.n_agents):
            state.append(self.share_observation_space[agent_n].sample())

        # available_actions = None

        return obs, state, None

    def seed(self, seed):
        pass

    def render(self):
        pass

    def close(self):
        pass

The error message is as follows:

(harl) /mnt/workspace>  cd /mnt/workspace ; /usr/bin/env /home/pai/envs/harl/bin/python /root/.local/share/code-server/extensions/ms-python.python-2022.2.1924087327/pythonFiles/lib/python/debugpy/launcher 38131 -- /mnt/workspace/HARL/examples/test.py 
choose to use gpu...
share_observation_space:  [Box(0.0, 1.0, (54,), float32), Box(0.0, 1.0, (54,), float32), Box(0.0, 1.0, (54,), float32)]
observation_space:  [Box(0.0, 1.0, (18,), float32), Box(0.0, 1.0, (18,), float32), Box(0.0, 1.0, (18,), float32)]
action_space:  [Box(0.0, 1.0, (5,), float32), Box(0.0, 1.0, (5,), float32), Box(0.0, 1.0, (5,), float32)]
start warmup
finish warmup, start training
Process Process-2:
Process Process-1:
Process Process-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/pai/envs/harl/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/pai/envs/harl/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/pai/envs/harl/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/pai/envs/harl/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/pai/envs/harl/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/pai/envs/harl/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/mnt/workspace/HARL/harl/envs/env_wrappers.py", line 185, in shareworker
    info[0]["original_obs"] = copy.deepcopy(ob)
  File "/mnt/workspace/HARL/harl/envs/env_wrappers.py", line 185, in shareworker
    info[0]["original_obs"] = copy.deepcopy(ob)
  File "/mnt/workspace/HARL/harl/envs/env_wrappers.py", line 185, in shareworker
    info[0]["original_obs"] = copy.deepcopy(ob)
KeyError: 0
KeyError: 0
KeyError: 0

The configuration is as follows:

    args = {
                'algo': 'hasac', 
                'env': 'leo_hap', 
                'exp_name': 'test2', 
                'load_config': '/mnt/workspace/HARL/examples/config.json'}

    algo_args = {
                    'algo': 
                    {
                        'alpha': 0.2, 
                        'alpha_lr': 0.0003, 
                        'auto_alpha': True, 
                        'batch_size': 1000, 
                        'buffer_size': 1000000, 
                        'fixed_order': False, 
                        'gamma': 0.99, 
                        'huber_delta': 10.0, 
                        'n_step': 20, 
                        'polyak': 0.005, 
                        'share_param': False, 
                        'use_huber_loss': False, 
                        'use_policy_active_masks': True}, 

                    'device': 
                    {
                        'cuda': True, 
                        'cuda_deterministic': True, 
                        'torch_threads': 4}, 

                    'eval': 
                    {
                        'eval_episodes': 6, 
                        'n_eval_rollout_threads': 3, 
                        'use_eval': True}, 

                    'logger': {'log_dir': './results'}, 

                    'model': 
                    {
                        'activation_func': 'relu', 
                        'critic_lr': 0.0005, 
                        'final_activation_func': 'tanh', 
                        'gain': 0.01, 'hidden_sizes': [256, 256], 
                        'initialization_method': 'orthogonal_', 
                        'lr': 0.0005, 'use_feature_normalization': True}, 

                    'render': 
                    {
                        'render_episodes': 10, 'use_render': False}, 

                    'seed': 
                    {'seed': 3, 'seed_specify': True}, 

                    'train': 
                    {
                        'eval_interval': 10, 
                        'log_interval': None, 
                        'model_dir': None, 
                        'n_rollout_threads': 3, 
                        'num_env_steps': 20000000, 
                        'train_interval': 50, 
                        'update_per_train': 1, 
                        'use_linear_lr_decay': False, 
                        'use_proper_time_limits': True, 
                        'use_valuenorm': False, 
                        'warmup_steps': 9}
                        }

    env_args = {'continuous_actions': True, 'scenario': 'simple_spread_v2'}

Perhaps my question is very basic, but I have been debugging for a long time and still haven't found the problem：(

Ivan-Zhong commented 1 year ago

I think the reason is the step function should return obs, state, rewards, dones, infos, None instead of obs, state, rewards, dones, info, None. Can you check it?

zkzfor commented 1 year ago

God, I was so stupid. I'm sorry to waste your valuable time on such a stupid problem. After modification, the test environment has now run for two rounds and can run stably. Thank you very much for your help. If you hadn't found my problem, I might have had to give up.

Ivan-Zhong commented 1 year ago

Don't worry at all! I'm really glad to hear that the issue has been resolved, and your test environment is running smoothly now. If you ever have any more questions or run into any other issues, please feel free to reach out.

PKU-MARL / HARL

Issues in Multi-Agent Environment Development #13