stefanbschneider / mobile-env

An open, minimalist Gymnasium environment for autonomous coordination in wireless mobile networks.
https://mobile-env.readthedocs.io
MIT License
109 stars 29 forks source link

For custom env #44

Closed R0B1NNN1 closed 1 year ago

R0B1NNN1 commented 1 year ago

Hello, @stefanbschneider @stwerner97

I met a problem when I define a new customer environment. Here is my code:

class Env1(MComCore):
    # overwrite some of the default settings
    @classmethod
    def default_config(cls):
        config = super().default_config()
        # update seed 
        config.update({
            "width": 200,
            "height": 200,
            "EP_MAX_TIME": 100,
            "seed": 68,
            "reset_rng_episode": True  
        })      
        return config

    # configure users and cells in the constructor
    def __init__(self, config={}, render_mode=None):
        # load default config defined above; overwrite with custom params
        env_config = self.default_config()
        env_config.update(config)

        # two cells next to each other; unpack config defaults for other params
        stations = [
            BaseStation(bs_id=0, pos=(110, 130), **env_config["bs"]),
            BaseStation(bs_id=1, pos=(65, 80), **env_config["bs"]),
            BaseStation(bs_id=2, pos=(120, 30), **env_config["bs"]),

        ]

        # users
        users = [
            # two fast moving users with config defaults
            UserEquipment(ue_id=1, **env_config["ue"]),
            UserEquipment(ue_id=2, **env_config["ue"]),
            UserEquipment(ue_id=3, **env_config["ue"]),
            UserEquipment(ue_id=4, **env_config["ue"]),
            UserEquipment(ue_id=5, **env_config["ue"]),
        ]

        super().__init__(stations, users, config, render_mode)       

import gymnasium
from ray.tune.registry import register_env

# use the mobile-env RLlib wrapper for RLlib
def register(config):
    # importing mobile_env registers the included environments
    from mobile_env.wrappers.multi_agent import RLlibMAWrapper

    env = Env1(config={"seed": 68}, render_mode="rgb_array")
    return RLlibMAWrapper(env)

# register the predefined scenario with RLlib
register_env("mobile-small-ma-v0", register)

import ray

# init ray with available CPUs (and GPUs) and init ray
ray.init(
  num_cpus=5,   # change to your available number of CPUs
  include_dashboard=False,
  ignore_reinit_error=True,
  log_to_driver=False,
)

import ray.air
from ray.rllib.algorithms.ppo import PPOConfig

from ray.rllib.policy.policy import PolicySpec
from ray.tune.stopper import MaximumIterationStopper

# Create an RLlib config using multi-agent PPO on mobile-env's small scenario.
config = (
    PPOConfig()
    .environment(env="mobile-small-ma-v0")
    # Here, we configure all agents to share the same policy.
    .multi_agent(
        policies={"shared_policy": PolicySpec()},
        policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: "shared_policy",
    )
    # RLlib needs +1 CPU than configured below (for the driver/traininer?)
    .resources(num_cpus_per_worker=4)
    .rollouts(num_rollout_workers=1)
)

# Create the Trainer/Tuner and define how long to train
tuner = ray.tune.Tuner(
    "PPO",
    run_config=ray.air.RunConfig(
        # Save the training progress and checkpoints locally under the specified subfolder.
        storage_path="./results",
        # Control training length by setting the number of iterations. 1 iter = 4000 time steps by default.
        stop=MaximumIterationStopper(max_iter=5),
        checkpoint_config=ray.air.CheckpointConfig(checkpoint_at_end=True),
    ),
    param_space=config,
)

# Run training and save the result
result_grid = tuner.fit()

So when I try to overwrite the original env MComCore, I have the following bugs:

2023-08-07 21:58:28,868 ERROR tune_controller.py:873 -- Trial task failed for trial PPO_mobile-small-ma-v0_1d01d_00000
Traceback (most recent call last):
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\_private\auto_init_hook.py", line 18, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\_private\worker.py", line 2540, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AttributeError): ray::PPO.train() (pid=32848, ip=127.0.0.1, actor_id=de081499b53ab9f1b929d70b01000000, repr=PPO)
  File "python\ray\_raylet.pyx", line 1434, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 1438, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 1378, in ray._raylet.execute_task.function_executor
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\_private\function_manager.py", line 724, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\tune\trainable\trainable.py", line 389, in train
    raise skipped from exception_cause(skipped)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\tune\trainable\trainable.py", line 386, in train
    result = self.step()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 803, in step
    results, train_iter_ctx = self._run_one_training_iteration()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 2853, in _run_one_training_iteration
    results = self.training_step()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 403, in training_step
    train_batch = synchronous_parallel_sample(
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 85, in synchronous_parallel_sample
    sample_batches = worker_set.foreach_worker(
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 722, in foreach_worker
    handle_remote_call_result_errors(remote_results, self._ignore_worker_failures)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 75, in handle_remote_call_result_errors
    raise r.get()
ray.exceptions.RayTaskError(AttributeError): ray::RolloutWorker.apply() (pid=34104, ip=127.0.0.1, actor_id=87a496b9b1fd3ab0e62cb5aa01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001AA097D0100>)
  File "python\ray\_raylet.pyx", line 1434, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 1438, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 1378, in ray._raylet.execute_task.function_executor
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\_private\function_manager.py", line 724, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 185, in apply
    raise e
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
    return func(self, *args, **kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 86, in <lambda>
    lambda w: w.sample(), local_worker=False, healthy_only=True
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
    return method(self, *_args, **_kwargs)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 915, in sample
    batches = [self.input_reader.next()]
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 92, in next
    batches = [self.get_data()]
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 277, in get_data
    item = next(self._env_runner)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 323, in run
    outputs = self.step()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 342, in step
    ) = self._base_env.poll()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\env\multi_agent_env.py", line 633, in poll
    ) = env_state.poll()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\env\multi_agent_env.py", line 828, in poll
    self.reset()
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\env\multi_agent_env.py", line 912, in reset
    raise e
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\ray\rllib\env\multi_agent_env.py", line 906, in reset
    obs_and_infos = self.env.reset(seed=seed, options=options)
  File "c:\Users\18406\anaconda3\envs\rayenv\lib\site-packages\mobile_env\wrappers\multi_agent.py", line 34, in reset
    self.prev_step_ues = set(obs.keys())
AttributeError: 'numpy.ndarray' object has no attribute 'keys'

But if I do not overwrite it, in other words if I use the default multi-agent env mobile-small-ma-v0 this bug will not happen. I am wondering why?

Thanks for replying in advance.

R0B1NNN1 commented 1 year ago

It seems like the problem with the function reset in mobile_env/wrappers/multi_agent.py ? It is really strange cause it works if I run the following :

import gymnasium
from ray.tune.registry import register_env

# use the mobile-env RLlib wrapper for RLlib
def register(config):
    # importing mobile_env registers the included environments
    import mobile_env
    from mobile_env.wrappers.multi_agent import RLlibMAWrapper

    env = gymnasium.make("mobile-small-ma-v0")
    return RLlibMAWrapper(env)

# register the predefined scenario with RLlib
register_env("mobile-small-ma-v0", register)

import ray

# init ray with available CPUs (and GPUs) and init ray
ray.init(
  num_cpus=5,   # change to your available number of CPUs
  include_dashboard=False,
  ignore_reinit_error=True,
  log_to_driver=False,
)

import ray.air
from ray.rllib.algorithms.ppo import PPOConfig

from ray.rllib.policy.policy import PolicySpec
from ray.tune.stopper import MaximumIterationStopper

# Create an RLlib config using multi-agent PPO on mobile-env's small scenario.
config = (
    PPOConfig()
    .environment(env="mobile-small-ma-v0")
    # Here, we configure all agents to share the same policy.
    .multi_agent(
        policies={'shared_policy': PolicySpec()},
        policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: 'shared_policy',
    )
    # RLlib needs +1 CPU than configured below (for the driver/traininer?)
    .resources(num_cpus_per_worker=4)
    .rollouts(num_rollout_workers=1)
)

# Create the Trainer/Tuner and define how long to train
tuner = ray.tune.Tuner(
    "PPO",
    run_config=ray.air.RunConfig(
        # Save the training progress and checkpoints locally under the specified subfolder.
        storage_path="./CTDE_1m",
        # Control training length by setting the number of iterations. 1 iter = 4000 time steps by default.
        stop=MaximumIterationStopper(max_iter=1),
        checkpoint_config=ray.air.CheckpointConfig(checkpoint_at_end=True),
    ),
    param_space=config,
)

# Run training and save the result
result_grid = tuner.fit()

which I did not overwrite anything just use the default env.

stefanbschneider commented 1 year ago

I think the issue is in how you register and pass your custom Env to RLlib.

I'm also always a bit unsure how to do that. As a reference, here is how the pre-defined scenarios are registered: https://github.com/stefanbschneider/mobile-env/blob/main/mobile_env/scenarios/registry.py

You shouldn't use the same name for your new custom env as one of the existing env names (eg, "mobile-small-ma-v0").

R0B1NNN1 commented 1 year ago

@stefanbschneider : Thanks for replying, I actually tried to use a different name. It still show me the same problem.

from ray.tune.registry import register_env

# use the mobile-env RLlib wrapper for RLlib
def register(config):
    # importing mobile_env registers the included environments
    from mobile_env.wrappers.multi_agent import RLlibMAWrapper

    env = Env1(config={"seed": 68},render_mode="rgb_array")
    return RLlibMAWrapper(env)

# register the predefined scenario with RLlib
register_env("TEST1", register)

This is really strange. I am still testing it. Since as I mentioned in the other issue. I assigned one agent to each BS. And that works when I register my custom Env. So I do not know why this happen.

Thanks for replying.

R0B1NNN1 commented 1 year ago

@stefanbschneider

Hi, I found out why, it is because of the handler, because I clone the source code and tried to make some changes and forgot that the default from the base file is central handler. So maybe we can close this issue for now.