Closed tongzhoumu closed 2 years ago
Hi, thank you so much for this bug report! This is quite weird... I can reproduce the issue and am trying to figure out what happens.
This issue only happens in:
gym:
dmc: (randomly bump this issue though...)
import envpool
import numpy as np
def random_rollout(task_id):
np.random.seed(0)
n = 32
envs = envpool.make_gym(task_id, num_envs=n, seed=123)
envs.reset()
rew_sum = 0
for _ in range(10):
# action = np.random.randint(envs.action_space.n, size=n)
action = np.random.rand(n, envs.action_space.shape[0])
obs, rew, done, info = envs.step(action)
rew_sum += rew
envs.close()
return rew_sum
if __name__ == "__main__":
for task_id in [
"Ant-v3",
"Ant-v4",
"HalfCheetah-v3",
"HalfCheetah-v4",
"Hopper-v3",
"Hopper-v4",
"Humanoid-v3",
"Humanoid-v4",
"HumanoidStandup-v2",
"HumanoidStandup-v4",
"InvertedDoublePendulum-v2",
"InvertedDoublePendulum-v4",
"InvertedPendulum-v2",
"InvertedPendulum-v4",
"Pusher-v2",
"Pusher-v4",
"Reacher-v2",
"Reacher-v4",
"Swimmer-v3",
"Swimmer-v4",
"Walker2d-v3",
"Walker2d-v4",
"AcrobotSwingup-v1",
"AcrobotSwingupSparse-v1",
"BallInCupCatch-v1",
"CartpoleBalance-v1",
"CartpoleBalanceSparse-v1",
"CartpoleSwingup-v1",
"CartpoleSwingupSparse-v1",
"CartpoleThreePoles-v1",
"CartpoleTwoPoles-v1",
"CheetahRun-v1",
"FingerSpin-v1",
"FingerTurnEasy-v1",
"FingerTurnHard-v1",
"FishSwim-v1",
"FishUpright-v1",
"HopperHop-v1",
"HopperStand-v1",
"HumanoidRun-v1",
"HumanoidRunPureState-v1",
"HumanoidStand-v1",
"HumanoidWalk-v1",
"HumanoidCMURun-v1",
"HumanoidCMUStand-v1",
"ManipulatorBringBall-v1",
"ManipulatorBringPeg-v1",
"ManipulatorInsertBall-v1",
"ManipulatorInsertPeg-v1",
"PendulumSwingup-v1",
"PointMassEasy-v1",
"PointMassHard-v1",
"ReacherEasy-v1",
"ReacherHard-v1",
"SwimmerSwimmer6-v1",
"SwimmerSwimmer15-v1",
"WalkerRun-v1",
"WalkerStand-v1",
"WalkerWalk-v1",
]:
a = random_rollout(task_id)
b = random_rollout(task_id)
print(task_id, a - b)
With HalfCheetah-v3:
This issue still exists when using mujoco source-code compiled .so
[0.5488135 0.71518937 0.60276338 0.54488318 0.4236548 0.64589411]
act[*]: 0.000000 0.000000 0.000000 0.000000 0.423655 0.645894
data: 0.000000 0.000000 0.000000 0.000000 0.423655 0.645894
reward 0.154327 0.059666
[0.5488135 0.71518937 0.60276338 0.54488318 0.4236548 0.64589411]
act[*]: 0.000000 0.000000 0.000000 0.000000 0.423655 0.645894
data: 0.548814 0.715189 0.602763 0.544883 0.423655 0.645894
reward 0.629115 0.206958
The action["action"_].Data()
will sometimes be overwritten with 0.
This issue also happens in: VizDoom:
import envpool
import numpy as np
import argparse
from gym import spaces
def random_rollout(env_name, num_envs, seed):
np.random.seed(seed)
n = num_envs
try:
print(env_name)
envs = envpool.make_gym(env_name, num_envs=n, seed=seed)
except:
print("Make Failed!")
return 0, 0
obs_ls = []
envs.reset()
print(envs.action_space)
rew_sum = 0
for _ in range(10):
if isinstance(envs.action_space, spaces.Box):
action = np.random.rand(n, envs.action_space.shape[0])
elif isinstance(envs.action_space, spaces.Discrete):
action = np.random.randint(envs.action_space.n, size=n)
obs, rew, done, info = envs.step(action)
obs_ls.append(obs)
rew_sum += rew
envs.close()
return rew_sum, np.array(obs_ls)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Envpool Bug Test')
parser.add_argument('--num_envs', type=int, default=32, help='number of envs')
parser.add_argument('--seed', type=int, default=123, help='random seed')
args = parser.parse_args()
env_list = \
[
"D1Basic-v1",
]
#env_list = envpool.list_all_envs()
for env_name in env_list:
a, obs0 = random_rollout(env_name, args.num_envs, args.seed)
b, obs1 = random_rollout(env_name, args.num_envs, args.seed)
print(env_name, ": ")
print(a - b)
print("obs check:")
np.testing.assert_allclose(obs0, obs1)
print("PASSED!")
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
On-line CPU(s) list: 0-7
Thread(s) per core: 2
Core(s) per socket: 4
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 94
Model name: Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
Stepping: 3
CPU MHz: 2794.209
CPU max MHz: 3600.0000
CPU min MHz: 800.0000
BogoMIPS: 5399.81
Virtualization: VT-x
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 8192K
NUMA node0 CPU(s): 0-7
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb invpcid_single pti tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp
It seems like a bug of core component of envpool. The action of the 0th env may sometimes be overwritten with 0 when the number of env >= 22
I create a cc_test but cannot reproduce the same behavior: when I set num_envs == 128, the action looks all good.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <random>
#include <vector>
#include "envpool/mujoco/gym/half_cheetah.h"
using MjcAction = typename mujoco_gym::HalfCheetahEnv::Action;
using MjcState = typename mujoco_gym::HalfCheetahEnv::State;
TEST(MjcEnvPoolTest, CheckAction) {
auto config = mujoco_gym::HalfCheetahEnvSpec::kDefaultConfig;
int num_envs = 128;
config["num_envs"_] = num_envs;
mujoco_gym::HalfCheetahEnvSpec spec(config);
mujoco_gym::HalfCheetahEnvPool envpool(spec);
Array all_env_ids(Spec<int>({num_envs}));
for (int i = 0; i < num_envs; ++i) {
all_env_ids[i] = i;
}
envpool.Reset(all_env_ids);
auto state_vec = envpool.Recv();
// construct action
std::vector<Array> raw_action({Array(Spec<int>({num_envs})),
Array(Spec<int>({num_envs})),
Array(Spec<double>({num_envs, 6}))});
MjcAction action(&raw_action);
for (int i = 0; i < num_envs; ++i) {
action["env_id"_][i] = i;
action["players.env_id"_][i] = i;
for (int j = 0; j < 6; ++j) {
action["action"_][i][j] = (i + j + 1) / 100.0;
}
}
// send
envpool.Send(action);
state_vec = envpool.Recv();
}
However, even one venv.step, the python result may not be correct:
import sys
import envpool
import numpy as np
def random_rollout(task_id, n):
np.random.seed(0)
envs = envpool.make_gym(task_id, num_envs=n, seed=123)
envs.reset()
# action = np.random.randint(envs.action_space.n, size=n)
action = np.random.rand(n, envs.action_space.shape[0])
print(action[0])
return envs.step(action)
if __name__ == "__main__":
n = int(sys.argv[-1])
for task_id in ['HalfCheetah-v3']:
a = random_rollout(task_id, n)
b = random_rollout(task_id, n)
print(task_id, (a[0] - b[0]).sum(), a[1] - b[1], a[2] ^ b[2])
This issue also happens in VizDoom
I confirm this issue is in v0.4.5 (at least).
Describe the bug
I use envpool to make HalfCheeth-v3 with a fixed seed, but the rewards are not the same during several runs. Specifically, only the reward turned by the first env is not deterministic, other envs are good. And if the num_envs is small, this bug does not occur.
To Reproduce
Output:
Expected behavior
The reward should be deterministic after seeding.
System info
Describe the characteristic of your environment:
Checklist