Open Alkahwaji opened 2 years ago
Hello, I ran the code using windows and ### Jupiter notebook. I got this error message:
TypeError Traceback (most recent call last) <ipython-input-6-ebd6b7e4996f> in <module> 61 iter_no += 1 62 s, a, r, next_s = agent.sample_env() ---> 63 agent.value_update(s, a, r, next_s) 64 65 reward = 0.0 <ipython-input-6-ebd6b7e4996f> in value_update(self, s, a, r, next_s) 35 best_v, _ = self.best_value_and_action(next_s) 36 new_v = r + GAMMA * best_v ---> 37 old_v = self.values[(s, a)] 38 self.values[(s, a)] = old_v * (1-ALPHA) + new_v * ALPHA 39 TypeError: unhashable type: 'dict'
#!/usr/bin/env python3 import gym import collections from tensorboardX import SummaryWriter ENV_NAME = "FrozenLake-v1" GAMMA = 0.9 ALPHA = 0.2 TEST_EPISODES = 20 class Agent: def __init__(self): self.env = gym.make(ENV_NAME) self.state = self.env.reset() self.values = collections.defaultdict(float) def sample_env(self): action = self.env.action_space.sample() old_state = self.state new_state, reward, is_done, _ = self.env.step(action) self.state = self.env.reset() if is_done else new_state return old_state, action, reward, new_state def best_value_and_action(self, state): best_value, best_action = None, None for action in range(self.env.action_space.n): action_value = self.values[(state, action)] if best_value is None or best_value < action_value: best_value = action_value best_action = action return best_value, best_action def value_update(self, s, a, r, next_s): best_v, _ = self.best_value_and_action(next_s) new_v = r + GAMMA * best_v old_v = self.values[(s, a)] self.values[(s, a)] = old_v * (1-ALPHA) + new_v * ALPHA def play_episode(self, env): total_reward = 0.0 state = env.reset() while True: _, action = self.best_value_and_action(state) new_state, reward, is_done, _ = env.step(action) total_reward += reward if is_done: break state = new_state return total_reward if __name__ == "__main__": test_env = gym.make(ENV_NAME) agent = Agent() writer = SummaryWriter(comment="-q-learning") iter_no = 0 best_reward = 0.0 while True: iter_no += 1 s, a, r, next_s = agent.sample_env() agent.value_update(s, a, r, next_s) reward = 0.0 for _ in range(TEST_EPISODES): reward += agent.play_episode(test_env) reward /= TEST_EPISODES writer.add_scalar("reward", reward, iter_no) if reward > best_reward: print("Best reward updated %.3f -> %.3f" % ( best_reward, reward)) best_reward = reward if reward > 0.80: print("Solved in %d iterations!" % iter_no) break writer.close()
Hello, I ran the code using windows and ### Jupiter notebook. I got this error message:
Error message
The original code::