I made a custom agent that uses Q learning to evaluate it's next action. But for some reason, I get a huge string of random digits printed to standard out and then this error pops up when running said code:
Traceback (most recent call last):
File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/gym/envs/classic_control/rendering.py", line 347, in __del__
File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/gym/envs/classic_control/rendering.py", line 343, in close
File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/pyglet/window/cocoa/__init__.py", line 277, in close
File "/Users/david.dalisay/Arcade-Learning-Environment/lib/python3.6/site-packages/pyglet/window/__init__.py", line 747, in close
ImportError: sys.meta_path is None, Python is likely shutting down
I've seen that the program runs fine (screen renders and program plays Kung Fu Master) if I leave out line 60 of the following (I made a comment where the issue is):
from datetime import datetime
import gym
import time
from gym import wrappers, logger
from random import randint
class QLearningAgent(object):
def __init__(self, action_space):
self.action_space = action_space
self.q_table = {}
self.gamma = 0.8 # discount factor
self.alpha = 1.0 # learning rate
self.epsilon = 10 # percentage.
def update_q_table(self, observation, action, reward):
function_start_time = datetime.now()
observation_key = self.get_observation_key(observation)
current_q_value = self.q_table[observation_key][action]
new_q_value = current_q_value + self.alpha * (reward + self.gamma * max(self.q_table[observation_key]) - current_q_value)
self.q_table[observation_key][action] = round(new_q_value, 2)
def get_observation_key(self, observation):
return "".join(map(str,observation.flatten()))
def act(self, observation, reward, done):
observation_key = self.get_observation_key(observation)
if observation_key not in self.q_table:
self.q_table[observation_key] = [0 for action in range(self.action_space.n)]
if randint(0,100) < self.epsilon:
action = self.action_space.sample()
else:
if sum(self.q_table[observation_key]) > 0:
action = self.q_table[observation_key].index(max(self.q_table[observation_key]))
else:
action = self.action_space.sample()
return action
if __name__ == '__main__':
logger.set_level(logger.INFO)
env = gym.make('KungFuMaster-v4')
env._max_episode_steps = 1000
env.seed(0)
agent = QLearningAgent(env.action_space)
episode_count = 1
reward = 0
done = False
for i in range(10):
observation = env.reset()
observation_index = 0
while True:
observation_index += 1
action = agent.act(observation, reward, done)
observation, reward, done, _ = env.step(action)
#### This causes program to break. ###
agent.update_q_table(observation, action, reward)
if done:
break
env.render()
env.close()
Hi all,
I made a custom agent that uses Q learning to evaluate it's next action. But for some reason, I get a huge string of random digits printed to standard out and then this error pops up when running said code:
I've seen that the program runs fine (screen renders and program plays Kung Fu Master) if I leave out line 60 of the following (I made a comment where the issue is):