Im trying to make the agent avoid the obstacles(danger) but the agent learns only going to the target even there is a danger in its path
I think the agent doesn`t see the danger_map in the observation space
Code example
class GridWorldEnv(gym.Env):
you should specify the render-modes that are supported by your environment (e.g. "human", "rgb_array", "ansi")
#and the framerate at which your environment should be rendered
# render_fps >> represents the target frames per second (FPS) when rendering the environment
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
def __init__(self, render_mode=None, size=DEFAULT_SIZE, danger_prob = DANGER_PROB):
self.size = size # The size of the square grid
self.window_size = 512 # The size of the PyGame window
self.danger_prob = danger_prob
self.observation_space = spaces.Dict(
{
"agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
"target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
"danger_map" : spaces.MultiBinary([size,size])
})
# We have 4 actions, corresponding to "right", "up", "left", "down"
self.action_space = spaces.Discrete(4)
self._action_to_direction = {
0: np.array([1, 0]),
1: np.array([0, 1]),
2: np.array([-1, 0]),
3: np.array([0, -1]),
}
assert render_mode is None or render_mode in self.metadata["render_modes"]
self.render_mode = render_mode
self.window = None
self.clock = None
def _get_obs(self):
return {"agent": self._agent_location, "target": self._target_location, "danger_map": self.danger_map}
def _get_info(self):
return {
"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)
}
def reset(self, seed=None, options=None):
# We need the following line to seed self.np_random
super().reset(seed=seed)
# Choose the agent's location uniformly at random
self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)
# We will sample the target's location randomly until it does not coincide with the agent's location
self._target_location = self._agent_location
while np.array_equal(self._target_location, self._agent_location):
self._target_location = self.np_random.integers(0, self.size, size=2, dtype=int)
self.danger_map = np.random.choice(a=[1,0],size=(self.size,self.size),p=[self.danger_prob,1-self.danger_prob]).astype(np.int8)
#make no danger holes in target and agent locations
self.danger_map[self._agent_location[1],self._agent_location[0]] = 0
self.danger_map[self._target_location[1],self._target_location[0]] = 0
observation = self._get_obs()
info = self._get_info()
if self.render_mode == "human":
self._render_frame()
return observation, info
def step(self, action):
# Map the action (element of {0,1,2,3}) to the direction we walk in
direction = self._action_to_direction[int(action)]
# We use `np.clip` to make sure we don't leave the grid
self._agent_location = np.clip(self._agent_location + direction, 0, self.size - 1)
# Check for Dnger holes
if self.danger_map[self._agent_location[1],self._agent_location[0]]:
reward = DANGER_PENALTY
terminated = True
# the agent has reached the target
elif np.array_equal(self._agent_location, self._target_location):
reward = GOAL_REWARD
terminated = True
else:
reward = STEP_PENALTY
terminated = False
observation = self._get_obs()
info = self._get_info()
if self.render_mode == "human":
self._render_frame()
return observation, reward, terminated, False, info
def render(self):
if self.render_mode == "rgb_array":
return self._render_frame()
def _render_frame(self):
if self.window is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.window = pygame.display.set_mode(
(self.window_size, self.window_size)
)
if self.clock is None and self.render_mode == "human":
self.clock = pygame.time.Clock()
canvas = pygame.Surface((self.window_size, self.window_size))
canvas.fill((255, 255, 255))
pix_square_size = (
self.window_size / self.size
) # The size of a single grid square in pixels
# First we draw the target
pygame.draw.rect(
canvas,
(255, 0, 0),
pygame.Rect(
pix_square_size * self._target_location,
(pix_square_size, pix_square_size),
),
)
# Now we draw the agent
pygame.draw.circle(
canvas,
(0, 0, 255),
(self._agent_location + 0.5) * pix_square_size,
pix_square_size / 3,
)
# draw danger zones
for x in range(self.size):
for y in range(self.size):
if self.danger_map[y,x]:
pygame.draw.rect(
canvas,
(0, 0, 0),
pygame.Rect(
pix_square_size * np.array([x,y]),
(pix_square_size, pix_square_size),
),
)
# Finally, add some gridlines
for x in range(self.size + 1):
pygame.draw.line(
canvas,
0,
(0, pix_square_size * x),
(self.window_size, pix_square_size * x),
width=3,
)
pygame.draw.line(
canvas,
0,
(pix_square_size * x, 0),
(pix_square_size * x, self.window_size),
width=3,
)
if self.render_mode == "human":
# The following line copies our drawings from `canvas` to the visible window
self.window.blit(canvas, canvas.get_rect())
pygame.event.pump()
pygame.display.update()
# We need to ensure that human-rendering occurs at the predefined framerate.
# The following line will automatically add a delay to keep the framerate stable.
self.clock.tick(self.metadata["render_fps"])
else: # rgb_array
return np.transpose(
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
)
def close(self):
if self.window is not None:
pygame.display.quit()
pygame.quit()
Relevant log output / Error message
There is no error, i need help in building the env
System Info
..
Checklist
[X] I have checked that there is no similar issue in the repo
🐛 Bug
Im trying to make the agent avoid the obstacles(danger) but the agent learns only going to the target even there is a danger in its path I think the agent doesn`t see the danger_map in the observation space
Code example
class GridWorldEnv(gym.Env):
you should specify the render-modes that are supported by your environment (e.g. "human", "rgb_array", "ansi")
Relevant log output / Error message
System Info
..
Checklist