import time
import gymnasium as gym
import numpy as np
env = gym.make('FrozenLake-v1')
render = False
running_reward = None
## 初始化Q表格
Q = np.zeros([env.observation_space.n, env.action_space.n])
## 设置超参数
lr = .85
lambd = .99
num_episodes = 10000
rList = []
##=================开始游戏=====================##
for i in range(num_episodes):
episode_time = time.time()
# 重置环境,env.reset() 返回一个 (state, info) 的 tuple
s = env.reset()[0] # 只取状态,不需要 info
rAll = 0
for j in range(99):
if render:
env.render()
## 动作选择:找到最大Q值对应的动作,并加上噪声
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
## 与环境互动,获取新状态和奖励
s1, r, terminated, truncated, _ = env.step(a)
d = terminated or truncated # 合并 terminated 和 truncated
s1 = s1 # 新状态变量
## 更新Q表格
Q[s, a] = Q[s, a] + lr * (r + lambd * np.max(Q[s1, :]) - Q[s, a])
rAll += r
s = s1 # 更新状态
if d: # 如果已结束,则跳出循环
break
rList.append(rAll)
running_reward = rAll if running_reward is None else running_reward * 0.99 + rAll * 0.01
print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs " % \
(i, num_episodes, rAll, running_reward, time.time() - episode_time))
# 打印最终Q表格
print("Final Q-Table Values:/n %s" % Q)
主要是import gymnasium as gym。gym 是由 OpenAI 开发的,而 gymnasium 是由 Farama Foundation 开发的,基本上继承了 gym 。从现在开始应该使用 gymnasium ,因为它正在实际开发中。使用gym会出现和新版numpy不兼容,报错module 'numpy' has no attribute 'bool8'. Did you mean: 'bool'?的情况
gymnasium==0.29.1 numpy==2.1.1
主要是
import gymnasium as gym
。gym 是由 OpenAI 开发的,而 gymnasium 是由 Farama Foundation 开发的,基本上继承了 gym 。从现在开始应该使用 gymnasium ,因为它正在实际开发中。使用gym会出现和新版numpy不兼容,报错module 'numpy' has no attribute 'bool8'. Did you mean: 'bool'?
的情况