Minimal Gymnasium Example

Hi, does anyone have a minimal example of how to step a gym env in the modern gymnasium API in a single thread that is separated from keras-rl? I've been looking through the docs/examples/tests and it seems like this is moving away from the EnvPlayer subclasses to OpenAIGymEnv with a few extra abstract methods to define (?). I can initialize a simple env as shown in examples/openai_example.py, but cannot get it to reset or step in the main thread without timing out.

import numpy as np
import gymnasium as gym

from poke_env import (
    AccountConfiguration,
    ShowdownServerConfiguration,
    AccountConfiguration,
    LocalhostServerConfiguration,
)
from poke_env.environment import Battle
from poke_env.player import RandomPlayer
from poke_env.player.openai_api import OpenAIGymEnv

class DemoEnv(OpenAIGymEnv):
    def action_space_size(self):
        return 26

    def action_to_move(self, action : int, battle : Battle):
        return self.agent.choose_random_move(battle)

    def get_opponent(self):
        return RandomPlayer(
            battle_format="gen8randombattle",
            server_configuration=LocalhostServerConfiguration,
        )

    def describe_embedding(self) -> gym.spaces.Space:
        return gym.spaces.Box(low=np.zeros(1,), high=np.ones(1,))

    def calc_reward(self, last_battle: Battle, current_battle: Battle) -> float:
        return 1.0 if current_battle.won else 0.0

    def embed_battle(self, battle: Battle):
        return np.zeros((1,))

if __name__ == "__main__":
    test_env = DemoEnv(
        battle_format="gen8randombattle",
        server_configuration=LocalhostServerConfiguration,
        start_challenging=True,
    )

    for ep in range(10):
        state, info = test_env.reset()
        done = False
        return_ = 0.0
        timesteps = 0
        while not done:
            state, reward, terminated, truncated, info = test_env.step(
                test_env.action_space.sample()
            )
            test_env.render()
            return_ += reward
            done = terminated or truncated
            timesteps += 1
        print(f"Episode {ep}:: Timesteps: {timesteps}, Total Return: {return_ : .2f}")

hsahovic / poke-env

Minimal Gymnasium Example #479