Farama-Foundation / stable-retro

Retro games for Reinforcement Learning
https://stable-retro.farama.org/
MIT License
155 stars 31 forks source link

Stable-retro Raspberry Pi 5 ForkServerProcess-1Error #112

Open StartaBafras opened 3 weeks ago

StartaBafras commented 3 weeks ago

I am trying to train a model using PPO, and the stable-baseline3[extra] library is also installed.

The issue occurs because the StochasticFrameSkip object does not have an action_space attribute, leading to an AttributeError when used in a multiprocessing environment with SubprocVecEnv. This error is triggered when wrappers like WarpFrame try to access the action_space during environment creation. Additionally, there is a deprecation warning from gymnasium about directly accessing env.action_space, recommending the use of env.unwrapped.action_space or env.get_wrapper_attr('action_space'). This causes the subprocess to fail, resulting in a ConnectionResetError.

/home/alarm/retro/env/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: WARN: env.action_space to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.action_space` for environment variables or `env.get_wrapper_attr('action_space')` that will search the reminding wrappers.
  logger.warn(
Process ForkServerProcess-1:
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/alarm/retro/env/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 24, in _worker
    env = env_fn_wrapper.var()
  File "/home/alarm/retro/test.py", line 98, in make_env
    env = wrap_deepmind_retro(env)
  File "/home/alarm/retro/test.py", line 83, in wrap_deepmind_retro
    env = WarpFrame(env)
  File "/home/alarm/retro/env/lib/python3.9/site-packages/stable_baselines3/common/atari_wrappers.py", line 188, in __init__
    gym.ObservationWrapper.__init__(self, env)
  File "/home/alarm/retro/env/lib/python3.9/site-packages/gym/core.py", line 215, in __init__
    self.action_space = self.env.action_space
  File "/home/alarm/retro/env/lib/python3.9/site-packages/gymnasium/core.py", line 315, in __getattr__
    return getattr(self.env, name)
AttributeError: 'StochasticFrameSkip' object has no attribute 'action_space'
Traceback (most recent call last):
  File "/home/alarm/retro/test.py", line 124, in <module>
    main()
  File "/home/alarm/retro/test.py", line 101, in main
    venv = VecTransposeImage(VecFrameStack(SubprocVecEnv([make_env] * 1), n_stack=40))
  File "/home/alarm/retro/env/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 111, in __init__
    observation_space, action_space = self.remotes[0].recv()
  File "/usr/local/lib/python3.9/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/usr/local/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/usr/local/lib/python3.9/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer

System information

pseudo-rnd-thoughts commented 3 weeks ago

Could you provide a minimal example script to test with? I imagine that StochasticFrameSkip hasn't called super().__init__(env)

StartaBafras commented 3 weeks ago

I am using a code very close to the code in the example folder.

"""
Train an agent using Proximal Policy Optimization from Stable Baselines 3
"""

import argparse

import gymnasium as gym
import numpy as np
from gymnasium.wrappers.time_limit import TimeLimit
from stable_baselines3 import PPO
from stable_baselines3.common.atari_wrappers import ClipRewardEnv, WarpFrame
from stable_baselines3.common.vec_env import (
    SubprocVecEnv,
    VecFrameStack,
    VecTransposeImage,
)

import retro

class StochasticFrameSkip(gym.Wrapper):
    def __init__(self, env, n, stickprob):
        gym.Wrapper.__init__(self, env)
        self.n = n
        self.stickprob = stickprob
        self.curac = None
        self.rng = np.random.RandomState()
        self.supports_want_render = hasattr(env, "supports_want_render")

    def reset(self, **kwargs):
        self.curac = None
        return self.env.reset(**kwargs)

    def step(self, ac):
        terminated = False
        truncated = False
        totrew = 0
        for i in range(self.n):
            # First step after reset, use action
            if self.curac is None:
                self.curac = ac
            # First substep, delay with probability=stickprob
            elif i == 0:
                if self.rng.rand() > self.stickprob:
                    self.curac = ac
            # Second substep, new action definitely kicks in
            elif i == 1:
                self.curac = ac
            if self.supports_want_render and i < self.n - 1:
                ob, rew, terminated, truncated, info = self.env.step(
                    self.curac,
                    want_render=False,
                )
            else:
                ob, rew, terminated, truncated, info = self.env.step(self.curac)
            totrew += rew
            if terminated or truncated:
                break
        return ob, totrew, terminated, truncated, info

def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs):
    if state is None:
        state = retro.State.DEFAULT
    env = retro.make(game, state, **kwargs)
    env = StochasticFrameSkip(env, n=4, stickprob=0.25)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env

def wrap_deepmind_retro(env):
    """
    Configure environment for retro games, using config similar to DeepMind-style Atari in openai/baseline's wrap_deepmind
    """
    env = WarpFrame(env)
    env = ClipRewardEnv(env)
    return env

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--game", default="Airstriker-Genesis")#BalloonFight-Nes Airstriker-Genesis
    parser.add_argument("--state", default=retro.State.DEFAULT)
    parser.add_argument("--scenario", default=None)

    args = parser.parse_args()

    def make_env():
        env = make_retro(game=args.game, state=args.state, scenario=args.scenario,render_mode="rgb_array") #,render_mode="rgb_array"
        env = wrap_deepmind_retro(env)
        return env

    venv = VecTransposeImage(VecFrameStack(SubprocVecEnv([make_env] * 1), n_stack=40))
    model = PPO(
        policy="CnnPolicy",
        env=venv,
        learning_rate=lambda f: f * 2.5e-4,
        n_steps=256,
        batch_size=128,
        n_epochs=3,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.4,
        ent_coef=0.013,
        verbose=2,
    )
    model.learn(
        total_timesteps=18_000_000,
        log_interval=1,
        progress_bar=True
    )
    model.save("N12")

if __name__ == "__main__":
    main()