I created a custom environment for my project and utilized the check_env function from the SB3 and Gym libraries. Only a "RuntimeWarning" is displayed in the first two lines of the log output. Subsequently, when attempting to train the model, the training process was initiated successfully and was operational. However, an error surfaced in the log output during the calculation of the mean reward and the collection of rollouts. Despite experimenting with different versions of Python, Numpy, Gym, Gymnasium, and SB3, the same error persisted. Notably, the error fails to specify which part of my custom environment is causing the issue.
Code example
import numpy as np
import random
# import gymnasium as gym
# from gymnasium import spaces
import gym
from gym import spaces
from CSEnv import CSEnv
class GymEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self,
action_repeat = 1, imageObservation = True, numberOfPoints = 20):
self._stepcounter = 0
self._action_repeat = action_repeat
self.imageObservation = imageObservation
self.numberOfPoints = numberOfPoints
self.distanceThreshold = distanceThreshold
#super(GymEnv, self).__init__()
# Creating an instance of the main CoppeliaSim class
self.cs = CSEnv()
# Reseting
self.reset()
# Define Observation Space
if self.imageObservation:
self.observation_space = spaces.Dict({"image": spaces.Box(low=0, high=255, shape=(256,256), dtype=np.uint8),
"vector": spaces.Box(low = -np.inf, high= np.inf, shape=(14,), dtype=np.float32)})
else:
obsvDimension = self._obsvShapePointPart + 14
self.observation_space = spaces.Box(low = -np.inf, high = np.inf, shape = (obsvDimension,), dtype = np.float32)
# Define Action Space
self.action_space = spaces.Box(low = -1, high = 1, shape = (4,), dtype = np.float32)
# def reset(self, seed = None, options = None):
def reset(self):
# super().reset(seed=seed)
# Reseting the environment and returning back the observation
self.terminated = 0
self.stepcounter = 0
self.success_counter = 0
# Closing simulation
self.cs.simStop()
return self.getObservation()
# In case using Gymnasium instead of Gym, the reset method should also return info
# info = {"is_success": self._is_success}
# obs = self.getObservation()
# return (obs, info)
def step(self, action):
self._action = action
for i in range(self._action_repeat):
self.cs.apply_action(action)
self.cs.simStep()
if self._termination():
break
self._stepcounter += 1
_observation = self.getObservation()
done = self._termination()
reward = self._reward()
infos = {"is_success": self._is_success}
observation, reward, terminated, info = _observation, reward, done, infos
return observation, reward, terminated, info
### Relevant log output / Error message
```shell
/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/numpy/core/fromnumeric.py:3432: RuntimeWarning: Mean of empty slice.
return _methods._mean(a, axis=axis, dtype=dtype,
/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/numpy/core/_methods.py:190: RuntimeWarning: invalid value encountered in double_scalars
ret = ret.dtype.type(ret / rcount)
Traceback (most recent call last):
File "/home/major/Desktop/Simulation/Main/Train.py", line 39, in <module>
model.learn(total_timesteps = 4e7)
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/stable_baselines3/sac/sac.py", line 302, in learn
return super().learn(
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/stable_baselines3/common/off_policy_algorithm.py", line 311, in learn
rollout = self.collect_rollouts(
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/stable_baselines3/common/off_policy_algorithm.py", line 580, in collect_rollouts
self._dump_logs()
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/stable_baselines3/common/off_policy_algorithm.py", line 408, in _dump_logs
self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/stable_baselines3/common/utils.py", line 412, in safe_mean
return np.nan if len(arr) == 0 else np.mean(arr)
File "<__array_function__ internals>", line 180, in mean
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/numpy/core/fromnumeric.py", line 3432, in mean
return _methods._mean(a, axis=axis, dtype=dtype,
File "/home/major/anaconda3/envs/CSRL46/lib/python3.9/site-packages/numpy/core/_methods.py", line 180, in _mean
ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
TypeError: unsupported operand type(s) for +: 'method' and 'method'
System Info
OS: Linux-6.2.0-37-generic-x86_64-with-glibc2.35 # 38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 2 18:01:13 UTC 2
Python: 3.9.16
Stable-Baselines3: 1.8.0
PyTorch: 2.1.1+cu121
GPU Enabled: True
Numpy: 1.23.0
Gym: 0.21.0
Checklist
[X] I have checked that there is no similar issue in the repo
🐛 Bug
I created a custom environment for my project and utilized the check_env function from the SB3 and Gym libraries. Only a "RuntimeWarning" is displayed in the first two lines of the log output. Subsequently, when attempting to train the model, the training process was initiated successfully and was operational. However, an error surfaced in the log output during the calculation of the mean reward and the collection of rollouts. Despite experimenting with different versions of Python, Numpy, Gym, Gymnasium, and SB3, the same error persisted. Notably, the error fails to specify which part of my custom environment is causing the issue.
Code example
System Info
Checklist