LucasAlegre / sumo-rl

Reinforcement Learning environments for Traffic Signal Control with SUMO. Compatible with Gymnasium, PettingZoo, and popular RL libraries.
https://lucasalegre.github.io/sumo-rl
MIT License
697 stars 191 forks source link

ValueError: The two structures don't have the same nested structure. when python experiments/ppo_4x4grid.py #179

Open lie12huo opened 8 months ago

lie12huo commented 8 months ago

When I executed the command “python experiments/ppo_4x4grid.py” for training, the following error occurred:

Failure # 1 (occurred at 2023-12-28_10-41-43) ray::PPO.train() (pid=8400, ip=127.0.0.1, actor_id=ad9e6648b1b2ed22aab2737601000000, repr=PPO) File "python\ray_raylet.pyx", line 1813, in ray._raylet.execute_task File "python\ray_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray_private\function_manager.py", line 726, in actor_method_executor return method(__ray_actor, *args, kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, *_args, *_kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\tune\trainable\trainable.py", line 342, in train raise skipped from exception_cause(skipped) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\tune\trainable\trainable.py", line 339, in train result = self.step() File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, _args, _kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 852, in step results, train_iter_ctx = self._run_one_training_iteration() File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, *_args, *_kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 3042, in _run_one_training_iteration results = self.training_step() File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, _args, **_kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\algorithms\ppo\ppo.py", line 407, in training_step train_batch = synchronous_parallel_sample( File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 83, in synchronous_parallel_sample sample_batches = worker_set.foreach_worker( File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 705, in foreach_worker handle_remote_call_result_errors(remote_results, self._ignore_worker_failures) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 78, in handle_remote_call_result_errors raise r.get() ray.exceptions.RayTaskError(ValueError): ray::RolloutWorker.apply() (pid=2132, ip=127.0.0.1, actor_id=3e746f41fe7f2d8a17e49dfe01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000029A98298220>) ValueError: The two structures don't have the same nested structure.

First structure: type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Second structure: type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 , 0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615, 0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246, 0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 , 0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 , 0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724, 0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 , 0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 , 0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057, 0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 , 0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 , 0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 , 0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489, 0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 , 0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 , 0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386, 0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731, 0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023, 0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366, 0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 , 0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 , 0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564, 0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 , 0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848, 0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 , 0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698, 0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 , 0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 , 0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124, 0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713, 0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402, 0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923, 0.56323195], dtype=float32))])

More specifically: Substructure "type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 , 0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615, 0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246, 0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 , 0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 , 0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724, 0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 , 0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 , 0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057, 0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 , 0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 , 0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 , 0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489, 0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 , 0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 , 0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386, 0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731, 0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023, 0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366, 0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 , 0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 , 0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564, 0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 , 0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848, 0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 , 0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698, 0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 , 0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 , 0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124, 0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713, 0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402, 0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923, 0.56323195], dtype=float32))])" is a sequence, while substructure "type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]" is not

During handling of the above exception, another exception occurred:

ray::RolloutWorker.apply() (pid=2132, ip=127.0.0.1, actor_id=3e746f41fe7f2d8a17e49dfe01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000029A98298220>) File "python\ray_raylet.pyx", line 1807, in ray._raylet.execute_task File "python\ray_raylet.pyx", line 1908, in ray._raylet.execute_task File "python\ray_raylet.pyx", line 1813, in ray._raylet.execute_task File "python\ray_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray_private\function_manager.py", line 726, in actor_method_executor return method(ray_actor, *args, kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, *_args, *_kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply raise e File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply return func(self, args, kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 84, in lambda w: w.sample(), local_worker=False, healthy_only=True File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span return method(self, *_args, **_kwargs) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample batches = [self.input_reader.next()] File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next batches = [self.get_data()] File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data item = next(self._env_runner) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run outputs = self.step() File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 370, in step active_envs, to_eval, outputs = self._process_observations( File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 637, in _process_observations processed = policy.agent_connectors(acd_list) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\agent\pipeline.py", line 41, in call ret = c(ret) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\connector.py", line 265, in call return [self.transform(d) for d in acd_list] File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\connector.py", line 265, in return [self.transform(d) for d in acd_list] File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\connectors\agent\obs_preproc.py", line 58, in transform d[SampleBatch.NEXT_OBS] = self._preprocessor.transform( File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\models\preprocessors.py", line 329, in transform self.check_shape(observation) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\models\preprocessors.py", line 69, in check_shape observation = convert_element_to_space_type( File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\ray\rllib\utils\spaces\space_utils.py", line 472, in convert_element_to_space_type return tree.mapstructure(map, element, sampled_element, check_types=False) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\tree__init__.py", line 433, in map_structure assert_same_structure(structures[0], other, check_types=check_types) File "K:\OpenSource\sumo-rl\venvPy\lib\site-packages\tree\init__.py", line 288, in assert_same_structure raise type(e)("%s\n" ValueError: The two structures don't have the same nested structure.

First structure: type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Second structure: type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 , 0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615, 0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246, 0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 , 0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 , 0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724, 0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 , 0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 , 0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057, 0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 , 0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 , 0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 , 0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489, 0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 , 0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 , 0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386, 0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731, 0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023, 0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366, 0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 , 0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 , 0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564, 0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 , 0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848, 0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 , 0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698, 0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 , 0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 , 0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124, 0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713, 0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402, 0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923, 0.56323195], dtype=float32))])

More specifically: Substructure "type=OrderedDict str=OrderedDict([('0', array([0.34979227, 0.8725393 , 0.08013749, 0.4683012 , 0.9829854 , 0.98014855, 0.58258814, 0.05028085, 0.993543 , 0.79412615, 0.06607185], dtype=float32)), ('1', array([0.88398516, 0.7195979 , 0.94150907, 0.9829224 , 0.92211246, 0.7414135 , 0.9920831 , 0.14566854, 0.81726855, 0.9366454 , 0.6274262 ], dtype=float32)), ('10', array([0.16446641, 0.6756482 , 0.7539203 , 0.4934962 , 0.8585525 , 0.6322913 , 0.7542331 , 0.7486755 , 0.49466062, 0.72004724, 0.8117338 ], dtype=float32)), ('11', array([0.5329431 , 0.8486154 , 0.9495838 , 0.8622418 , 0.8732489 , 0.11242072, 0.0763188 , 0.2220357 , 0.854032 , 0.973182 , 0.14879882], dtype=float32)), ('12', array([0.732119 , 0.89823484, 0.69070387, 0.77506936, 0.98971057, 0.99043095, 0.3193064 , 0.69851375, 0.40632918, 0.406967 , 0.62862474], dtype=float32)), ('13', array([0.47952086, 0.02109447, 0.42499098, 0.40413964, 0.264478 , 0.10132027, 0.2610051 , 0.02141384, 0.231504 , 0.8975433 , 0.99703795], dtype=float32)), ('14', array([0.67056215, 0.16663882, 0.08163167, 0.73198503, 0.18105489, 0.99467266, 0.02948203, 0.9314566 , 0.04069875, 0.8837653 , 0.35313195], dtype=float32)), ('15', array([0.4275111 , 0.05799369, 0.20000993, 0.8529059 , 0.7172784 , 0.7484241 , 0.90706795, 0.9734425 , 0.55966806, 0.81240386, 0.03192328], dtype=float32)), ('2', array([0.6548908 , 0.42107597, 0.06570876, 0.09022505, 0.07516731, 0.7484601 , 0.3176393 , 0.29006734, 0.6668242 , 0.76966023, 0.31101513], dtype=float32)), ('3', array([0.8722979 , 0.6160401 , 0.28451207, 0.3753895 , 0.59421366, 0.89204305, 0.01789684, 0.80874205, 0.4302826 , 0.9208242 , 0.3285712 ], dtype=float32)), ('4', array([0.9464896 , 0.01961527, 0.834267 , 0.6228876 , 0.9121172 , 0.01453374, 0.499453 , 0.12847178, 0.5970337 , 0.92107564, 0.1353088 ], dtype=float32)), ('5', array([0.77832663, 0.8915154 , 0.7435042 , 0.8148381 , 0.9744162 , 0.55549747, 0.8838653 , 0.63371605, 0.23385969, 0.34045848, 0.1165311 ], dtype=float32)), ('6', array([0.45601034, 0.55218536, 0.40453702, 0.6130139 , 0.9555645 , 0.8283712 , 0.02224702, 0.39311445, 0.791911 , 0.32984698, 0.18268831], dtype=float32)), ('7', array([0.92055535, 0.77292 , 0.27333862, 0.29092216, 0.782299 , 0.25580984, 0.92083466, 0.0973004 , 0.17253524, 0.8128827 , 0.504909 ], dtype=float32)), ('8', array([0.51502854, 0.56702006, 0.17254692, 0.7095564 , 0.03431124, 0.76993406, 0.86907685, 0.38690564, 0.3951562 , 0.11255713, 0.8427719 ], dtype=float32)), ('9', array([0.74191433, 0.6941966 , 0.6615604 , 0.7109615 , 0.21396402, 0.07951149, 0.48645335, 0.7014952 , 0.6249435 , 0.13057923, 0.56323195], dtype=float32))])" is a sequence, while substructure "type=ndarray str=[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]" is not Entire first structure: . Entire second structure: OrderedDict([('0', .), ('1', .), ('10', .), ('11', .), ('12', .), ('13', .), ('14', .), ('15', .), ('2', .), ('3', .), ('4', .), ('5', .), ('6', .), ('7', .), ('8', .), ('9', .)])

SecondTheFirst commented 6 months ago

@lie12huo have you figured a way to solve it, i am facing the same issue

lie12huo commented 6 months ago

@SecondTheFirst This seems to be a version compatibility issue that I have never solved.

liopeer commented 6 months ago

facing the same issue, running it in a docker container https://hub.docker.com/r/lionelpeer/sumo-rl

SafeguardLi commented 3 weeks ago

Facing the same issue. Any ideas?

liopeer commented 3 weeks ago

Facing the same issue. Any ideas?

Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import PettingZooEnv from there instead of from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv.

It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release.

from typing import Optional 

from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space
from ray.rllib.utils.typing import MultiAgentDict
# import any
from gymnasium.spaces import Tuple    

@PublicAPI
class PettingZooEnv(MultiAgentEnv):
    """An interface to the PettingZoo MARL environment library.

    See: https://github.com/Farama-Foundation/PettingZoo

    Inherits from MultiAgentEnv and exposes a given AEC
    (actor-environment-cycle) game from the PettingZoo project via the
    MultiAgentEnv public API.

    Note that the wrapper has some important limitations:

    1. All agents have the same action_spaces and observation_spaces.
       Note: If, within your aec game, agents do not have homogeneous action /
       observation spaces, apply SuperSuit wrappers
       to apply padding functionality: https://github.com/Farama-Foundation/
       SuperSuit#built-in-multi-agent-only-functions
    2. Environments are positive sum games (-> Agents are expected to cooperate
       to maximize reward). This isn't a hard restriction, it just that
       standard algorithms aren't expected to work well in highly competitive
       games.

    Examples:
        >>> from pettingzoo.butterfly import prison_v3
        >>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
        >>> env = PettingZooEnv(prison_v3.env())
        >>> obs, infos = env.reset()
        >>> print(obs)
        # only returns the observation for the agent which should be stepping
        {
            'prisoner_0': array([[[0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                ...,
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0]]], dtype=uint8)
        }
        >>> obs, rewards, terminateds, truncateds, infos = env.step({
        ...     "prisoner_0": 1
        ... })
        # only returns the observation, reward, info, etc, for
        # the agent who's turn is next.
        >>> print(obs)
        {
            'prisoner_1': array([[[0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                ...,
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0]]], dtype=uint8)
        }
        >>> print(rewards)
        {
            'prisoner_1': 0
        }
        >>> print(terminateds)
        {
            'prisoner_1': False, '__all__': False
        }
        >>> print(truncateds)
        {
            'prisoner_1': False, '__all__': False
        }
        >>> print(infos)
        {
            'prisoner_1': {'map_tuple': (1, 0)}
        }
    """

    def __init__(self, env):
        super().__init__()
        self.env = env
        env.reset()

        # Since all agents have the same spaces, do not provide full observation-
        # and action-spaces as Dicts, mapping agent IDs to the individual
        # agents' spaces. Instead, `self.[action|observation]_space` are the single
        # agent spaces.
        self._obs_space_in_preferred_format = False
        self._action_space_in_preferred_format = False

        # Collect the individual agents' spaces (they should all be the same):
        first_obs_space = self.env.observation_space(self.env.agents[0])
        first_action_space = self.env.action_space(self.env.agents[0])

        for agent in self.env.agents:
            if self.env.observation_space(agent) != first_obs_space:
                raise ValueError(
                    "Observation spaces for all agents must be identical. Perhaps "
                    "SuperSuit's pad_observations wrapper can help (useage: "
                    "`supersuit.aec_wrappers.pad_observations(env)`"
                )
            if self.env.action_space(agent) != first_action_space:
                raise ValueError(
                    "Action spaces for all agents must be identical. Perhaps "
                    "SuperSuit's pad_action_space wrapper can help (usage: "
                    "`supersuit.aec_wrappers.pad_action_space(env)`"
                )

        # Convert from gym to gymnasium, if necessary.
        self.observation_space = convert_old_gym_space_to_gymnasium_space(
            first_obs_space
        )
        self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)

        self._agent_ids = self.env.agents

    def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
        if agent_ids is None:
            agent_ids = self._agent_ids
        return {id: self.observation_space.sample() for id in agent_ids}

    def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
        if agent_ids is None:
            agent_ids = self._agent_ids
        return {id: self.action_space.sample() for id in agent_ids}

    def action_space_contains(self, x: MultiAgentDict) -> bool:
        if not isinstance(x, dict):
            return False
        return all(self.action_space.contains(val) for val in x.values())

    def observation_space_contains(self, x: MultiAgentDict) -> bool:
        if not isinstance(x, dict):
            return False
        return all(self.observation_space.contains(val) for val in x.values())

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        info = self.env.reset(seed=seed, options=options)
        return (
            {self.env.agent_selection: self.env.observe(self.env.agent_selection)},
            info or {},
        )

    def step(self, action):
        self.env.step(action[self.env.agent_selection])
        obs_d = {}
        rew_d = {}
        terminated_d = {}
        truncated_d = {}
        info_d = {}
        while self.env.agents:
            obs, rew, terminated, truncated, info = self.env.last()
            agent_id = self.env.agent_selection
            obs_d[agent_id] = obs
            rew_d[agent_id] = rew
            terminated_d[agent_id] = terminated
            truncated_d[agent_id] = truncated
            info_d[agent_id] = info
            if (
                self.env.terminations[self.env.agent_selection]
                or self.env.truncations[self.env.agent_selection]
            ):
                self.env.step(None)
            else:
                break

        all_gone = not self.env.agents
        terminated_d["__all__"] = all_gone and all(terminated_d.values())
        truncated_d["__all__"] = all_gone and all(truncated_d.values())

        return obs_d, rew_d, terminated_d, truncated_d, info_d

    def close(self):
        self.env.close()

    def render(self):
        return self.env.render(self.render_mode)

    @property
    def get_sub_environments(self):
        return self.env.unwrapped

@PublicAPI
class ParallelPettingZooEnv(MultiAgentEnv):
    def __init__(self, env):
        super().__init__()
        self.par_env = env
        self.par_env.reset()

        # Since all agents have the same spaces, do not provide full observation-
        # and action-spaces as Dicts, mapping agent IDs to the individual
        # agents' spaces. Instead, `self.[action|observation]_space` are the single
        # agent spaces.
        self._obs_space_in_preferred_format = False
        self._action_space_in_preferred_format = False

        # Get first observation space, assuming all agents have equal space
        self.observation_space = self.par_env.observation_space(self.par_env.agents[0])

        # Get first action space, assuming all agents have equal space
        self.action_space = self.par_env.action_space(self.par_env.agents[0])
        assert all(
            self.par_env.observation_space(agent) == self.observation_space
            for agent in self.par_env.agents
        ), (
            "Observation spaces for all agents must be identical. Perhaps "
            "SuperSuit's pad_observations wrapper can help (useage: "
            "`supersuit.aec_wrappers.pad_observations(env)`"
        )
        if not all(
            self.par_env.action_space(agent) == self.action_space
            for agent in self.par_env.agents
        ):
            print("Action spaces for all agents must be identical. Perhaps "
                "SuperSuit's pad_action_space wrapper can help (useage: "
                "`supersuit.aec_wrappers.pad_action_space(env)`"
            )
        assert all(
            self.par_env.action_space(agent) == self.action_space
            for agent in self.par_env.agents
        ), (
            "Action spaces for all agents must be identical. Perhaps "
            "SuperSuit's pad_action_space wrapper can help (useage: "
            "`supersuit.aec_wrappers.pad_action_space(env)`"
        )

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        obs, info = self.par_env.reset(seed=seed, options=options)
        return obs, info or {}

    def step(self, action_dict):
        obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict)
        terminateds["__all__"] = all(terminateds.values())
        truncateds["__all__"] = all(truncateds.values())
        return obss, rews, terminateds, truncateds, infos

    def close(self):
        self.par_env.close()

    def render(self):
        return self.par_env.render(self.render_mode)

    @property
    def get_sub_environments(self):
        return self.par_env.unwrapped
SafeguardLi commented 3 weeks ago

Facing the same issue. Any ideas?

Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import PettingZooEnv from there instead of from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv.

It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release.

from typing import Optional 

from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space
from ray.rllib.utils.typing import MultiAgentDict
# import any
from gymnasium.spaces import Tuple    

@PublicAPI
class PettingZooEnv(MultiAgentEnv):
    """An interface to the PettingZoo MARL environment library.

    See: https://github.com/Farama-Foundation/PettingZoo

    Inherits from MultiAgentEnv and exposes a given AEC
    (actor-environment-cycle) game from the PettingZoo project via the
    MultiAgentEnv public API.

    Note that the wrapper has some important limitations:

    1. All agents have the same action_spaces and observation_spaces.
       Note: If, within your aec game, agents do not have homogeneous action /
       observation spaces, apply SuperSuit wrappers
       to apply padding functionality: https://github.com/Farama-Foundation/
       SuperSuit#built-in-multi-agent-only-functions
    2. Environments are positive sum games (-> Agents are expected to cooperate
       to maximize reward). This isn't a hard restriction, it just that
       standard algorithms aren't expected to work well in highly competitive
       games.

    Examples:
        >>> from pettingzoo.butterfly import prison_v3
        >>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
        >>> env = PettingZooEnv(prison_v3.env())
        >>> obs, infos = env.reset()
        >>> print(obs)
        # only returns the observation for the agent which should be stepping
        {
            'prisoner_0': array([[[0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                ...,
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0]]], dtype=uint8)
        }
        >>> obs, rewards, terminateds, truncateds, infos = env.step({
        ...     "prisoner_0": 1
        ... })
        # only returns the observation, reward, info, etc, for
        # the agent who's turn is next.
        >>> print(obs)
        {
            'prisoner_1': array([[[0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                ...,
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0]]], dtype=uint8)
        }
        >>> print(rewards)
        {
            'prisoner_1': 0
        }
        >>> print(terminateds)
        {
            'prisoner_1': False, '__all__': False
        }
        >>> print(truncateds)
        {
            'prisoner_1': False, '__all__': False
        }
        >>> print(infos)
        {
            'prisoner_1': {'map_tuple': (1, 0)}
        }
    """

    def __init__(self, env):
        super().__init__()
        self.env = env
        env.reset()

        # Since all agents have the same spaces, do not provide full observation-
        # and action-spaces as Dicts, mapping agent IDs to the individual
        # agents' spaces. Instead, `self.[action|observation]_space` are the single
        # agent spaces.
        self._obs_space_in_preferred_format = False
        self._action_space_in_preferred_format = False

        # Collect the individual agents' spaces (they should all be the same):
        first_obs_space = self.env.observation_space(self.env.agents[0])
        first_action_space = self.env.action_space(self.env.agents[0])

        for agent in self.env.agents:
            if self.env.observation_space(agent) != first_obs_space:
                raise ValueError(
                    "Observation spaces for all agents must be identical. Perhaps "
                    "SuperSuit's pad_observations wrapper can help (useage: "
                    "`supersuit.aec_wrappers.pad_observations(env)`"
                )
            if self.env.action_space(agent) != first_action_space:
                raise ValueError(
                    "Action spaces for all agents must be identical. Perhaps "
                    "SuperSuit's pad_action_space wrapper can help (usage: "
                    "`supersuit.aec_wrappers.pad_action_space(env)`"
                )

        # Convert from gym to gymnasium, if necessary.
        self.observation_space = convert_old_gym_space_to_gymnasium_space(
            first_obs_space
        )
        self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)

        self._agent_ids = self.env.agents

    def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
        if agent_ids is None:
            agent_ids = self._agent_ids
        return {id: self.observation_space.sample() for id in agent_ids}

    def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
        if agent_ids is None:
            agent_ids = self._agent_ids
        return {id: self.action_space.sample() for id in agent_ids}

    def action_space_contains(self, x: MultiAgentDict) -> bool:
        if not isinstance(x, dict):
            return False
        return all(self.action_space.contains(val) for val in x.values())

    def observation_space_contains(self, x: MultiAgentDict) -> bool:
        if not isinstance(x, dict):
            return False
        return all(self.observation_space.contains(val) for val in x.values())

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        info = self.env.reset(seed=seed, options=options)
        return (
            {self.env.agent_selection: self.env.observe(self.env.agent_selection)},
            info or {},
        )

    def step(self, action):
        self.env.step(action[self.env.agent_selection])
        obs_d = {}
        rew_d = {}
        terminated_d = {}
        truncated_d = {}
        info_d = {}
        while self.env.agents:
            obs, rew, terminated, truncated, info = self.env.last()
            agent_id = self.env.agent_selection
            obs_d[agent_id] = obs
            rew_d[agent_id] = rew
            terminated_d[agent_id] = terminated
            truncated_d[agent_id] = truncated
            info_d[agent_id] = info
            if (
                self.env.terminations[self.env.agent_selection]
                or self.env.truncations[self.env.agent_selection]
            ):
                self.env.step(None)
            else:
                break

        all_gone = not self.env.agents
        terminated_d["__all__"] = all_gone and all(terminated_d.values())
        truncated_d["__all__"] = all_gone and all(truncated_d.values())

        return obs_d, rew_d, terminated_d, truncated_d, info_d

    def close(self):
        self.env.close()

    def render(self):
        return self.env.render(self.render_mode)

    @property
    def get_sub_environments(self):
        return self.env.unwrapped

@PublicAPI
class ParallelPettingZooEnv(MultiAgentEnv):
    def __init__(self, env):
        super().__init__()
        self.par_env = env
        self.par_env.reset()

        # Since all agents have the same spaces, do not provide full observation-
        # and action-spaces as Dicts, mapping agent IDs to the individual
        # agents' spaces. Instead, `self.[action|observation]_space` are the single
        # agent spaces.
        self._obs_space_in_preferred_format = False
        self._action_space_in_preferred_format = False

        # Get first observation space, assuming all agents have equal space
        self.observation_space = self.par_env.observation_space(self.par_env.agents[0])

        # Get first action space, assuming all agents have equal space
        self.action_space = self.par_env.action_space(self.par_env.agents[0])
        assert all(
            self.par_env.observation_space(agent) == self.observation_space
            for agent in self.par_env.agents
        ), (
            "Observation spaces for all agents must be identical. Perhaps "
            "SuperSuit's pad_observations wrapper can help (useage: "
            "`supersuit.aec_wrappers.pad_observations(env)`"
        )
        if not all(
            self.par_env.action_space(agent) == self.action_space
            for agent in self.par_env.agents
        ):
            print("Action spaces for all agents must be identical. Perhaps "
                "SuperSuit's pad_action_space wrapper can help (useage: "
                "`supersuit.aec_wrappers.pad_action_space(env)`"
            )
        assert all(
            self.par_env.action_space(agent) == self.action_space
            for agent in self.par_env.agents
        ), (
            "Action spaces for all agents must be identical. Perhaps "
            "SuperSuit's pad_action_space wrapper can help (useage: "
            "`supersuit.aec_wrappers.pad_action_space(env)`"
        )

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        obs, info = self.par_env.reset(seed=seed, options=options)
        return obs, info or {}

    def step(self, action_dict):
        obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict)
        terminateds["__all__"] = all(terminateds.values())
        truncateds["__all__"] = all(truncateds.values())
        return obss, rews, terminateds, truncateds, infos

    def close(self):
        self.par_env.close()

    def render(self):
        return self.par_env.render(self.render_mode)

    @property
    def get_sub_environments(self):
        return self.par_env.unwrapped

It works! Thanks a lot!