Kung Fu Master - sys.meta_path is None error

Hi all,

I made a custom agent that uses Q learning to evaluate it's next action. But for some reason, I get a huge string of random digits printed to standard out and then this error pops up when running said code:

Traceback (most recent call last):
  File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/gym/envs/classic_control/rendering.py", line 347, in __del__
  File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/gym/envs/classic_control/rendering.py", line 343, in close
  File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/pyglet/window/cocoa/__init__.py", line 277, in close
  File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/pyglet/window/__init__.py", line 747, in close
ImportError: sys.meta_path is None, Python is likely shutting down

I've seen that the program runs fine (screen renders and program plays Kung Fu Master) if I leave out line 60 of the following (I made a comment where the issue is):

from datetime import datetime
import gym
import time
from gym import wrappers, logger
from random import randint

class QLearningAgent(object):
    def __init__(self, action_space):
        self.action_space = action_space
        self.q_table = {}
        self.gamma = 0.8  # discount factor 
        self.alpha = 1.0  # learning rate
        self.epsilon = 10 # percentage.

    def update_q_table(self, observation, action, reward):
        function_start_time = datetime.now()
        observation_key = self.get_observation_key(observation)
        current_q_value = self.q_table[observation_key][action]
        new_q_value = current_q_value + self.alpha * (reward + self.gamma * max(self.q_table[observation_key]) - current_q_value)
        self.q_table[observation_key][action] = round(new_q_value, 2)

    def get_observation_key(self, observation):
        return "".join(map(str,observation.flatten()))

    def act(self, observation, reward, done):
        observation_key = self.get_observation_key(observation)
        if observation_key not in self.q_table:
            self.q_table[observation_key] = [0 for action in range(self.action_space.n)]

        if randint(0,100) < self.epsilon:
            action = self.action_space.sample()
        else:
            if sum(self.q_table[observation_key]) > 0:
                action = self.q_table[observation_key].index(max(self.q_table[observation_key]))
            else:
                action = self.action_space.sample()

        return action

if __name__ == '__main__':
    logger.set_level(logger.INFO)

    env = gym.make('KungFuMaster-v4')
    env._max_episode_steps = 1000
    env.seed(0)
    agent = QLearningAgent(env.action_space)

    episode_count = 1
    reward = 0
    done = False

    for i in range(10):
        observation = env.reset()
        observation_index = 0
        while True:
            observation_index += 1
            action = agent.act(observation, reward, done)
            observation, reward, done, _ = env.step(action)

           #### This causes program to break. ###
            agent.update_q_table(observation, action, reward)
            if done:
                break
            env.render()
    env.close()

openai / gym

Kung Fu Master - sys.meta_path is None error #908