ray-project / ray

Ray is a unified framework for scaling AI and Python applications. Ray consists of a core distributed runtime and a set of AI Libraries for accelerating ML workloads.
https://ray.io
Apache License 2.0
32.05k stars 5.46k forks source link

RLlib - Multiagent new api - rllib-multi-agent-env-v0 already in registry #45433

Open zoetsekas opened 1 month ago

zoetsekas commented 1 month ago

What happened + What you expected to happen

I converted existing code working on 2.7 to 2.20 (new api)

The error: File "/opt/project/trading/training/model/rl/multi_agent/ppo/equity/trainer.py", line 125, in start_training_equity algo: Algorithm = config.build() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm_config.py", line 859, in build return algo_class( File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 554, in init super().init( File "/usr/local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 158, in init self.setup(copy.deepcopy(self.config)) File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 670, in setup self.evaluation_workers: EnvRunnerGroup = EnvRunnerGroup( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 169, in init self._setup( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 260, in _setup self._local_worker = self._make_worker( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 1108, in _make_worker worker = cls( File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py", line 73, in init self.make_env() File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py", line 726, in make_env gym.register( File "/usr/local/lib/python3.10/site-packages/gymnasium/envs/registration.py", line 693, in register logger.warn(f"Overriding environment {new_spec.id} already in registry.") File "/usr/local/lib/python3.10/site-packages/gymnasium/logger.py", line 55, in warn warnings.warn( UserWarning: WARN: Overriding environment rllib-multi-agent-env-v0 already in registry.

Versions / Dependencies

Ray 2.20

Reproduction script


def env_creator(cfg):
    return FinancialPortfolioEnv(env_config=EnvContext(env_config=cfg, worker_index=0))

register_env('FinancialPortfolio-Equity-v0', env_creator)

config = (
    PPOConfig()
    .api_stack(
        enable_rl_module_and_learner=True,
        enable_env_runner_and_connector_v2=True,
    )
    .environment(env="FinancialPortfolio-Equity-v0", env_config=env_cfg)
    .training(
        train_batch_size=256,
        gamma=0.99,
        lr=0.001,
        lambda_=0.95,
        # sgd_minibatch_size=64,
        clip_param=0.2,
        # vf_clip_param = 10.0,
        kl_target=0.005,
        kl_coeff=0.5,
        entropy_coeff=0.01,
        vf_loss_coeff=0.5,
        model={
            "uses_new_env_runners": True,
            "use_lstm": False,
            "fcnet_hiddens": [1024, 512, 256, 128, 64, 32],
            "fcnet_activation": "relu",
        },
    )
    .framework(framework="torch")
    .resources(
        num_learner_workers=0,  # <- in most cases, set this value to the number of GPUs
        num_gpus_per_learner_worker=0,  # <- set this to 1, if you have at least 1 GPU
        num_cpus_for_local_worker=1,
    )
    .env_runners(env_runner_cls=MultiAgentEnvRunner, num_env_runners=0, num_envs_per_env_runner=1,
                 preprocessor_pref=None)
    .evaluation(evaluation_interval=1, evaluation_duration=5, evaluation_duration_unit="episodes",
                evaluation_num_env_runners=-1)
    .debugging(log_level="INFO", logger_config={"type": PrintLogger, "prefix": "trading-equity"})
    .multi_agent(
        count_steps_by="env_steps",
        policies=get_policies_to_be_trained(),
        policy_mapping_fn=policy_mapping_fn,
        policies_to_train=get_policies_to_be_trained()
    ).rl_module(
        _enable_rl_module_api=True,
        model_config_dict={
            "fcnet_activation": "relu",
            "fcnet_hiddens": [512, 256, 128, 64, 32],
            "uses_new_env_runners": True,
        },
        rl_module_spec=MultiAgentRLModuleSpec(
            module_specs={
                p: SingleAgentRLModuleSpec(
                    module_class=PPOTorchRLModule,
                    action_space=gym.spaces.Discrete(
                        n=env_cfg['info']['parameters']['environment']['action']['n'],
                        start=env_cfg['info']['parameters']['environment']['action']['start']),
                    observation_space=gym.spaces.Box(low=0, high=1, shape=(len(features),)),
                    model_config_dict={
                        "fcnet_activation": "relu",
                        "fcnet_hiddens": [512, 256, 128, 64, 32]},
                    catalog_class=PPOCatalog,
                ) for p in get_policies()},
        ),
    )
)

### Issue Severity

High: It blocks me from completing my task.
zoetsekas commented 1 month ago

when evaluation is disabled it proceeds further but new error appears File "/opt/project/trading/training/model/rl/multi_agent/ppo/equity/trainer.py", line 129, in start_training_equity result = algo.train() File "/usr/local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 331, in train raise skipped from exception_cause(skipped) File "/usr/local/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 328, in train result = self.step() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 873, in step train_results, train_iter_ctx = self._run_one_training_iteration() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 3156, in _run_one_training_iteration results = self.training_step() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 424, in training_step return self._training_step_new_api_stack() File "/usr/local/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 445, in _training_step_new_api_stack episodes, env_runner_results = synchronous_parallel_sample( File "/usr/local/lib/python3.10/site-packages/ray/rllib/execution/rollout_ops.py", line 94, in synchronous_parallel_sample stats_dicts = [worker_set.local_worker().get_metrics()] File "/usr/local/lib/python3.10/site-packages/ray/rllib/env/multi_agent_env_runner.py", line 615, in get_metrics return self.metrics.reduce() File "/usr/local/lib/python3.10/site-packages/ray/rllib/utils/metrics/metrics_logger.py", line 750, in reduce self.stats[sub_key] = stat.reduce() File "/usr/local/lib/python3.10/site-packages/ray/rllib/utils/metrics/stats.py", line 274, in reduce self.values = self._reduced_values()[1] File "/usr/local/lib/python3.10/site-packages/ray/rllib/utils/metrics/stats.py", line 539, in _reduced_values reduced = reduce_meth(values) File "/usr/local/lib/python3.10/site-packages/numpy/lib/nanfunctions.py", line 1052, in nanmean warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)

simonsays1980 commented 1 month ago

@zoetsekas Thanks for filing the issue. These are both warnings that are due to some default names getting overwritten and initial metrics being NaN. These can be safely ignored.