DLR-RM / stable-baselines3

PyTorch version of Stable Baselines, reliable implementations of reinforcement learning algorithms.
https://stable-baselines3.readthedocs.io
MIT License
8.85k stars 1.68k forks source link

[Bug]: bug title SubprocVecEnv TypeError: reset() got an unexpected argument 'seed' #2001

Closed ccleavinger closed 2 weeks ago

ccleavinger commented 3 weeks ago

🐛 Bug

I was attempting to create a simple PPO Reinforcement Learning model utilizing stable-baselines3 (ver 2.3.2), gymnasium (0.29.1), and stable-retro (0.9.2). After lots of work fixing package incompatibilities and rewriting the codebase I settled into a script based off of this tutorial. However I'm encountering a TypeError that seems to be in subproc_vec_env. I'm not entirely sure if this is my fault along the way or an issue in the library. I couldn't find anything on the internet about this error and would like some guidance. Thanks in advance for any and all help.

To Reproduce

import gym
import numpy as np
from RandomAgent import TimeLimitWrapper

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import VecMonitor
from stable_baselines3.common.atari_wrappers import MaxAndSkipEnv
import os

import retro

class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq:
    :param log_dir: Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: Verbosity level.
    """

    def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, 'best_model')
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}")
                    self.model.save(self.save_path)

        return True

# Create log dir
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

def make_env(env_id, rank, seed=0):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        # env = gym.make(env_id)
        env = retro.make(game=env_id)
        env = TimeLimitWrapper(env, max_steps=2000)
        env = MaxAndSkipEnv(env, 4)
        env.reset(seed=seed+rank, options={})
        return env

    set_random_seed(seed)
    return _init

if __name__ == '__main__':
    env_id = "SuperMarioBros-Nes"
    num_cpu = 4  # Number of processes to use
    # Create the vectorized environment
    env = VecMonitor(SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]), "tmp/TestMonitor")

    # Stable Baselines provides you with make_vec_env() helper
    # which does exactly the previous steps for you.
    # You can choose between `DummyVecEnv` (usually faster) and `SubprocVecEnv`
    # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)

    model = PPO('CnnPolicy', env, verbose=1, tensorboard_log="./board/", learning_rate=0.00003)
    # model = PPO.load("tmp/best_model", env=env)
    print("------------- Start Learning -------------")
    callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)
    model.learn(total_timesteps=5000000, callback=callback, tb_log_name="PPO-00003")
    model.save(env_id)
    print("------------- Done Learning -------------")
    env = retro.make(game=env_id)
    env = TimeLimitWrapper(env)

    obs = env.reset()
    for _ in range(1000):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()

Relevant log output / Error message

Process ForkServerProcess-4:
Process ForkServerProcess-3:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 29, in _worker
    env = _patch_env(env_fn_wrapper.var())
  File "Train.py", line 86, in _init
    env.reset(seed=seed+rank, options={})
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/gymnasium/core.py", line 467, in reset
    return self.env.reset(seed=seed, options=options)
TypeError: reset() got an unexpected keyword argument 'seed'
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 29, in _worker
    env = _patch_env(env_fn_wrapper.var())
  File "Train.py", line 86, in _init
    env.reset(seed=seed+rank, options={})
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/gymnasium/core.py", line 467, in reset
    return self.env.reset(seed=seed, options=options)
TypeError: reset() got an unexpected keyword argument 'seed'
Process ForkServerProcess-1:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 29, in _worker
    env = _patch_env(env_fn_wrapper.var())
  File "Train.py", line 86, in _init
    env.reset(seed=seed+rank, options={})
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/gymnasium/core.py", line 467, in reset
    return self.env.reset(seed=seed, options=options)
TypeError: reset() got an unexpected keyword argument 'seed'
Process ForkServerProcess-2:
Traceback (most recent call last):
  File "Train.py", line 97, in <module>
    env = VecMonitor(SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]), "tmp/TestMonitor")
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 119, in __init__
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 29, in _worker
    env = _patch_env(env_fn_wrapper.var())
  File "Train.py", line 86, in _init
    env.reset(seed=seed+rank, options={})
  File "/mnt/d/test/MarioAI/MarioAI/.venv/lib/python3.8/site-packages/gymnasium/core.py", line 467, in reset
    return self.env.reset(seed=seed, options=options)
TypeError: reset() got an unexpected keyword argument 'seed'
    observation_space, action_space = self.remotes[0].recv()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
ConnectionResetError: [Errno 104] Connection reset by peer

System Info

Checklist

araffin commented 2 weeks ago

If code there is, it is minimal and working

Closing because the minimum requirements for seeking help are not met.

PS: you are using a version of gym that is not supported by SB3, please use gymnasium instead (or gym 0.21, but not recommended).