I'm trying to write an A2C chess model, but I'm having a weird bug where the model tries to index keys that don't exist in the Discrete() action space when either player is put in check. I tried running the check_env() function, and it returned two warnings:
Projects\mlenv\Lib\site-packages\stable_baselines3\common\env_checker.py:38: UserWarning: It seems that your observation space is an image but the upper and lower bounds are not in [0, 255]. Because the CNN policy normalize automatically the observation you may encounter issue if the values are not in that range.
Projects\mlenv\Lib\site-packages\stable_baselines3\common\env_checker.py:51: UserWarning: The minimal resolution for an image is 36x36 for the default `CnnPolicy`. You might need to use a custom features extractor cf. https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
The observation space is initialized as the following:
ie, a one-hot encoded chessboard. Clearly, this is not an image, so what about it is causing the A2C model to treat it like one? Here's a version of my code with irrelevant methods/functions removed:
import chess
import re
import gymnasium as gym
import numpy as np
from stable_baselines3 import A2C
from stable_baselines3.common.env_checker import check_env
# Creating some constants
MOVE_LIST_REGEX = r'\((.*?)\)'
board = chess.Board()
starting_pos = np.zeros((8, 8, 12), dtype=np.uint8)
for square, piece in board.piece_map().items():
piece_type = piece.piece_type
piece_color = int(piece.color)
starting_pos[square // 8][square % 8][piece_type - 1 + 6 * piece_color] = 1
STARTING_POS_TENSOR = starting_pos
def calculate_reward(board: chess.Board):
result = board.result()
if result == '1-0':
return 1 # Agent wins
elif result == '0-1':
return -1 # Agent loses
else:
return 0
def make_move_list(board: chess.Board):
moves = [move.strip() for move in re.findall(MOVE_LIST_REGEX, str(board.legal_moves))[0].split(',')]
move_dict = {i: move for i, move in enumerate(moves)}
return move_dict
class ChessEnvironment(gym.Env):
def __init__(self):
super().__init__()
# Define the observation space
# We represent the board with an 8x8 grid, then imagine one-hot encoding tensors extending
# into the 3rd dimension of the chessboard, representing what piece is where (both side and color)
# Since there are 6 pieces per side (pawn, knight, bishop, rook, queen, and king), there are 12 total
self.observation_space = gym.spaces.Box(low=0, high=1, shape=(8, 8, 12), dtype=np.uint8)
# Initialize the chessboard
self.board = chess.Board()
# Action space
self.move_dict = make_move_list(self.board)
self.action_space = gym.spaces.Discrete(len(self.move_dict))
def sync_action_space(self):
self.move_dict = make_move_list(self.board)
self.action_space = gym.spaces.Discrete(len(self.move_dict))
def reset(self, seed=None, options=None):
# Reset the chessboard to the starting position
self.board = chess.Board()
self.sync_action_space()
# Return the initial observation
return STARTING_POS_TENSOR, {}
def step(self, action):
move = self.move_dict[action]
# Execute the specified action on the chessboard and update model parameters
self.board.push_san(move)
self.render()
self.sync_action_space()
# Convert the board to the observation format
observation = np.zeros((8, 8, 12), dtype=np.uint8)
for square, piece in self.board.piece_map().items():
piece_type = piece.piece_type
piece_color = int(piece.color)
observation[square // 8][square % 8][piece_type - 1 + 6 * piece_color] = 1
# Calculate the reward
reward = calculate_reward(self.board)
# Check if the episode is done
terminated = (self.board.result() != '*')
# Return the observation, reward, done flag, and additional info
return observation, reward, terminated, False, {}
# Train the model
total_timesteps = int(1e5)
log_interval = 1000
for timestep in range(total_timesteps):
observation, _ = env.reset()
done = False
while not done:
action, _ = model.predict(observation)
observation, reward, done, _, _ = env.step(int(action))
if timestep % log_interval == 0:
print(f"Timestep: {timestep}/{total_timesteps}")
# Save the trained model
model.save("a2c_chess_model")
Also here's the traceback if that's helpful:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[205], line 13
11 print(f"The next action index is {action}")
12 print(type(action))
---> 13 observation, reward, done, _, _ = env.step(int(action))
15 if timestep % log_interval == 0:
16 print(f"Timestep: {timestep}/{total_timesteps}")
Cell In[202], line 41, in ChessEnvironment.step(self, action)
40 def step(self, action):
---> 41 move = self.move_dict[action]
42 # Execute the specified action on the chessboard
43 self.board.push_san(move)
KeyError: 18
Checklist
[X] I have checked that there is no similar issue in the repo
❓ Question
I'm trying to write an A2C chess model, but I'm having a weird bug where the model tries to index keys that don't exist in the Discrete() action space when either player is put in check. I tried running the
check_env()
function, and it returned two warnings:The observation space is initialized as the following:
ie, a one-hot encoded chessboard. Clearly, this is not an image, so what about it is causing the
A2C
model to treat it like one? Here's a version of my code with irrelevant methods/functions removed:Also here's the traceback if that's helpful:
Checklist