When using HER with replay_buffer that overflows at some point we get a crash with IndexError.
To Reproduce
Run HER + SAC with small buffer_size and small learning_starts
from stable_baselines3 import HER, DDPG, DQN, SAC, TD3
from stable_baselines3.her.goal_selection_strategy import GoalSelectionStrategy
from stable_baselines3.common.bit_flipping_env import BitFlippingEnv
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.obs_dict_wrapper import ObsDictWrapper
model_class = DQN # works also with SAC, DDPG and TD3
N_BITS = 15
env = BitFlippingEnv(n_bits=N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
# Available strategies (cf paper): future, final, episode
goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
# If True the HER transitions will get sampled online
online_sampling = True
# Time limit for the episodes
max_episode_length = N_BITS
# Initialize the model
model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, online_sampling=online_sampling,
verbose=1, max_episode_length=max_episode_length, buffer_size=100,
learning_starts=1, learning_rate=1)
# Train the model
model.learn(1000)
Traceback (most recent call last):
File "her_fail.py", line 25, in <module>
model.learn(1000)
File "/home/lubiluk/Code/stable-baselines3/stable_baselines3/her/her.py", line 214, in learn
self.train(batch_size=self.batch_size, gradient_steps=gradient_steps)
File "/home/lubiluk/Code/stable-baselines3/stable_baselines3/dqn/dqn.py", line 153, in train
replay_data = self.replay_buffer.sample(batch_size, env=self._vec_normalize_env)
File "/home/lubiluk/Code/stable-baselines3/stable_baselines3/her/her_replay_buffer.py", line 141, in sample
return self._sample_transitions(batch_size, maybe_vec_env=env, online_sampling=True)
File "/home/lubiluk/Code/stable-baselines3/stable_baselines3/her/her_replay_buffer.py", line 265, in _sample_transitions
for episode_idx, transition_idx in zip(episode_indices, transitions_indices)
File "/home/lubiluk/Code/stable-baselines3/stable_baselines3/her/her_replay_buffer.py", line 265, in <listcomp>
for episode_idx, transition_idx in zip(episode_indices, transitions_indices)
IndexError: deque index out of range
Expected behavior
The training should complete without crashing.
System Info
Describe the characteristic of your environment:
Installation: tried both - pip and source (master branch)
no GPU
Python 3.7.9
PyTorch 1.7.0
Gym 0.17.3
Additional context
Checklist
[ x] I have checked that there is no similar issue in the repo (required)
🐛 Bug
When using HER with replay_buffer that overflows at some point we get a crash with IndexError.
To Reproduce
Run HER + SAC with small
buffer_size
and smalllearning_starts
Expected behavior
The training should complete without crashing.
System Info
Describe the characteristic of your environment:
Additional context
Checklist