openai / atari-py

A packaged and slightly-modified version of https://github.com/bbitmaster/ale_python_interface
GNU General Public License v2.0
367 stars 183 forks source link

The clone/restoreState() actually includes the pseudorandomness of Atari. #28

Closed YuhangSong closed 2 years ago

YuhangSong commented 6 years ago

The document said that clone/restoreState() doesn't include pseudorandomness. However, the fact is that clone/restoreState() doesn't include the _pseudorandomness of repeataction, but it includes the pseudorandomness of Atari.

The code to verify above fact is as below:

import numpy as np
import atari_py

test = 'loadROM'
# test = 'restoreState'

frame_skip = 4
bunch = 200
sequence = 500

def main():
    result = {
        'name':[],
        'grouped_num':[],
        'distribution':[],
    }

    # game_list = ['air_raid-n', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis']
    # game_list = ['bank_heist', 'battle_zone', 'beam_rider', 'berzerk-n', 'bowling', 'boxing', 'breakout', 'carnival-n']
    # game_list = ['centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk']
    # game_list = ['elevator_action-n', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar']
    # game_list = ['hero', 'ice_hockey', 'jamesbond', 'journey_escape-n', 'kangaroo', 'krull', 'kung_fu_master']
    # game_list = ['montezuma_revenge-n', 'ms_pacman', 'name_this_game', 'phoenix-n', 'pitfall-n', 'pong', 'pooyan-n']
    # game_list = ['private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing-n']
    # game_list = ['solaris-n', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down']
    # game_list = ['venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge-n', 'zaxxon']

    game_list = ['pong', 'assault','ms_pacman']

    for game in game_list:

        if '-n' in game:
            '''games that are not in the nature DQN list'''
            continue

        game_path = atari_py.get_game_path(game)

        env_father = atari_py.ALEInterface()
        env_father.setFloat('repeat_action_probability'.encode('utf-8'), 0.0)
        env_father.setInt(b'random_seed', 3)
        env_father.loadROM(game_path)
        env_father.reset_game()

        if test in ['restoreState']:
            state_after_reset = env_father.cloneState()

        '''generate a sequence of actions'''
        action_sequence = np.random.randint(
            len(env_father.getMinimalActionSet()),
            size = sequence,
        )

        bunch_obs = []
        distribution = []
        samples = []
        for bunch_i in range(bunch):

            env_temp = atari_py.ALEInterface()
            env_temp.setFloat('repeat_action_probability'.encode('utf-8'), 0.0)
            env_temp.setInt(b'random_seed', bunch_i)
            if test in ['loadROM']:
                env_temp.loadROM(game_path)
                env_temp.reset_game()
            elif test in ['restoreState']:
                env_temp.loadROM(game_path) # restoreState without calling loadROM first will cause Segmentation fault (core dumped)
                env_temp.restoreState(state_after_reset)

            # just to make sure
            env_temp.setFloat('repeat_action_probability'.encode('utf-8'), 0.0)
            env_temp.setInt(b'random_seed', bunch_i)

            for sequence_i in range(sequence):
                for frame_skip_i in range(frame_skip):
                    env_temp.act(
                        env_father.getMinimalActionSet()[
                            action_sequence[sequence_i]
                        ]
                    )
                if env_temp.game_over():
                    env_temp.reset_game()

            obs = env_temp.getScreenRGB2()

            samples += [obs]
            found_at_bunch = -1
            if_has_identical_one = False
            max_value = 0
            for bunch_obs_i in range(len(bunch_obs)):
                obs_in_bunch = bunch_obs[bunch_obs_i]
                max_value = np.max(
                    np.abs(
                        obs-obs_in_bunch
                    )
                )
                if max_value < 1:
                    found_at_bunch = bunch_obs_i
                    if_has_identical_one = True
                    distribution[found_at_bunch] += 1
                    break

            if if_has_identical_one is False:
                bunch_obs += [obs]
                distribution += [1]

        grouped_num = len(bunch_obs)
        print('game:{} grouped_num:{} distribution:{}'.format(
            game,
            grouped_num,
            distribution,
        ))

if __name__ == "__main__":
    main()

Specifically, when set test = 'loadROM', it tests the stochasticity produced by

setInt(b'random_seed')
loadROM()

The results are:

game:pong grouped_num:1 distribution:[200]
game:assault grouped_num:57 distribution:[4, 4, 6, 3, 8, 9, 2, 5, 3, 2, 5, 5, 5, 4, 5, 4, 2, 4, 6, 3, 1, 4, 1, 5, 5, 1, 2, 1, 3, 6, 3, 4, 2, 3, 5, 3, 4, 6, 3, 5, 4, 3, 2, 2, 3, 4, 2, 1, 3, 4, 3, 1, 2, 2, 5, 2, 1]
game:ms_pacman grouped_num:1 distribution:[200]

When set test = 'restoreState', it tests the stochasticity produced by clone/restoreState(). The results are:

game:pong grouped_num:1 distribution:[200]
game:assault grouped_num:1 distribution:[200]
game:ms_pacman grouped_num:1 distribution:[200]

As you can see, we set repeat_action_probability to zero all the time since we do not care about this kind of stochasticity. The results shows that clone/restoreState() actually includes the pseudorandomness of Atari. Thus, we actually do not have a function that really exclude the pseudorandomness so that we can use it for planning.

Any ideas? Is there any way we can have a function that clone/restore state of Atari without pseudorandomness so that we can use it for planning?

Many thanks for everyone's effort on this repo!

jkterry1 commented 2 years ago

Hey, this was fixed with the 0.7 release of ALE-Py