The document said that clone/restoreState() doesn't include pseudorandomness. However, the fact is that clone/restoreState() doesn't include the _pseudorandomness of repeataction, but it includes the pseudorandomness of Atari.
The code to verify above fact is as below:
import numpy as np
import atari_py
test = 'loadROM'
# test = 'restoreState'
frame_skip = 4
bunch = 200
sequence = 500
def main():
result = {
'name':[],
'grouped_num':[],
'distribution':[],
}
# game_list = ['air_raid-n', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis']
# game_list = ['bank_heist', 'battle_zone', 'beam_rider', 'berzerk-n', 'bowling', 'boxing', 'breakout', 'carnival-n']
# game_list = ['centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk']
# game_list = ['elevator_action-n', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar']
# game_list = ['hero', 'ice_hockey', 'jamesbond', 'journey_escape-n', 'kangaroo', 'krull', 'kung_fu_master']
# game_list = ['montezuma_revenge-n', 'ms_pacman', 'name_this_game', 'phoenix-n', 'pitfall-n', 'pong', 'pooyan-n']
# game_list = ['private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing-n']
# game_list = ['solaris-n', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down']
# game_list = ['venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge-n', 'zaxxon']
game_list = ['pong', 'assault','ms_pacman']
for game in game_list:
if '-n' in game:
'''games that are not in the nature DQN list'''
continue
game_path = atari_py.get_game_path(game)
env_father = atari_py.ALEInterface()
env_father.setFloat('repeat_action_probability'.encode('utf-8'), 0.0)
env_father.setInt(b'random_seed', 3)
env_father.loadROM(game_path)
env_father.reset_game()
if test in ['restoreState']:
state_after_reset = env_father.cloneState()
'''generate a sequence of actions'''
action_sequence = np.random.randint(
len(env_father.getMinimalActionSet()),
size = sequence,
)
bunch_obs = []
distribution = []
samples = []
for bunch_i in range(bunch):
env_temp = atari_py.ALEInterface()
env_temp.setFloat('repeat_action_probability'.encode('utf-8'), 0.0)
env_temp.setInt(b'random_seed', bunch_i)
if test in ['loadROM']:
env_temp.loadROM(game_path)
env_temp.reset_game()
elif test in ['restoreState']:
env_temp.loadROM(game_path) # restoreState without calling loadROM first will cause Segmentation fault (core dumped)
env_temp.restoreState(state_after_reset)
# just to make sure
env_temp.setFloat('repeat_action_probability'.encode('utf-8'), 0.0)
env_temp.setInt(b'random_seed', bunch_i)
for sequence_i in range(sequence):
for frame_skip_i in range(frame_skip):
env_temp.act(
env_father.getMinimalActionSet()[
action_sequence[sequence_i]
]
)
if env_temp.game_over():
env_temp.reset_game()
obs = env_temp.getScreenRGB2()
samples += [obs]
found_at_bunch = -1
if_has_identical_one = False
max_value = 0
for bunch_obs_i in range(len(bunch_obs)):
obs_in_bunch = bunch_obs[bunch_obs_i]
max_value = np.max(
np.abs(
obs-obs_in_bunch
)
)
if max_value < 1:
found_at_bunch = bunch_obs_i
if_has_identical_one = True
distribution[found_at_bunch] += 1
break
if if_has_identical_one is False:
bunch_obs += [obs]
distribution += [1]
grouped_num = len(bunch_obs)
print('game:{} grouped_num:{} distribution:{}'.format(
game,
grouped_num,
distribution,
))
if __name__ == "__main__":
main()
Specifically, when set test = 'loadROM', it tests the stochasticity produced by
As you can see, we set repeat_action_probability to zero all the time since we do not care about this kind of stochasticity. The results shows that clone/restoreState() actually includes the pseudorandomness of Atari. Thus, we actually do not have a function that really exclude the pseudorandomness so that we can use it for planning.
Any ideas? Is there any way we can have a function that clone/restore state of Atari without pseudorandomness so that we can use it for planning?
The document said that clone/restoreState() doesn't include pseudorandomness. However, the fact is that clone/restoreState() doesn't include the _pseudorandomness of repeataction, but it includes the pseudorandomness of Atari.
The code to verify above fact is as below:
Specifically, when set
test = 'loadROM'
, it tests the stochasticity produced byThe results are:
When set
test = 'restoreState'
, it tests the stochasticity produced byclone/restoreState()
. The results are:As you can see, we set repeat_action_probability to zero all the time since we do not care about this kind of stochasticity. The results shows that
clone/restoreState()
actually includes the pseudorandomness of Atari. Thus, we actually do not have a function that really exclude the pseudorandomness so that we can use it for planning.Any ideas? Is there any way we can have a function that clone/restore state of Atari without pseudorandomness so that we can use it for planning?
Many thanks for everyone's effort on this repo!