DLR-RM / stable-baselines3

PyTorch version of Stable Baselines, reliable implementations of reinforcement learning algorithms.
https://stable-baselines3.readthedocs.io
MIT License
8.84k stars 1.68k forks source link

Avoiding obstacles #1944

Closed rowanhossamm closed 3 months ago

rowanhossamm commented 3 months ago

🐛 Bug

Im trying to make the agent avoid the obstacles(danger) but the agent learns only going to the target even there is a danger in its path I think the agent doesn`t see the danger_map in the observation space

Code example

class GridWorldEnv(gym.Env):

you should specify the render-modes that are supported by your environment (e.g. "human", "rgb_array", "ansi")

#and the framerate at which your environment should be rendered
# render_fps >> represents the target frames per second (FPS) when rendering the environment
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

def __init__(self, render_mode=None, size=DEFAULT_SIZE, danger_prob = DANGER_PROB):
    self.size = size  # The size of the square grid
    self.window_size = 512  # The size of the PyGame window
    self.danger_prob = danger_prob

    self.observation_space = spaces.Dict(
        {
            "agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
            "target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
            "danger_map" : spaces.MultiBinary([size,size])
        })

    # We have 4 actions, corresponding to "right", "up", "left", "down"
    self.action_space = spaces.Discrete(4)

    self._action_to_direction = {
        0: np.array([1, 0]),
        1: np.array([0, 1]),
        2: np.array([-1, 0]),
        3: np.array([0, -1]),
    }

    assert render_mode is None or render_mode in self.metadata["render_modes"]
    self.render_mode = render_mode

    self.window = None
    self.clock = None

def _get_obs(self):
    return {"agent": self._agent_location, "target": self._target_location, "danger_map": self.danger_map}

def _get_info(self):
    return { 
        "distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)
    }

def reset(self, seed=None, options=None):
    # We need the following line to seed self.np_random
    super().reset(seed=seed)

    # Choose the agent's location uniformly at random
    self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)

    # We will sample the target's location randomly until it does not coincide with the agent's location
    self._target_location = self._agent_location
    while np.array_equal(self._target_location, self._agent_location):
        self._target_location = self.np_random.integers(0, self.size, size=2, dtype=int)

    self.danger_map = np.random.choice(a=[1,0],size=(self.size,self.size),p=[self.danger_prob,1-self.danger_prob]).astype(np.int8)

    #make no danger  holes in target and agent locations
    self.danger_map[self._agent_location[1],self._agent_location[0]] = 0 
    self.danger_map[self._target_location[1],self._target_location[0]] = 0

    observation = self._get_obs()
    info = self._get_info()

    if self.render_mode == "human":
        self._render_frame()

    return observation, info

def step(self, action):
    # Map the action (element of {0,1,2,3}) to the direction we walk in
    direction = self._action_to_direction[int(action)]

    # We use `np.clip` to make sure we don't leave the grid
    self._agent_location = np.clip(self._agent_location + direction, 0, self.size - 1)

    # Check for Dnger holes
    if self.danger_map[self._agent_location[1],self._agent_location[0]]:
        reward = DANGER_PENALTY  
        terminated = True

    # the agent has reached the target
    elif np.array_equal(self._agent_location, self._target_location):
        reward = GOAL_REWARD
        terminated = True   

    else:
        reward = STEP_PENALTY
        terminated = False

    observation = self._get_obs()
    info = self._get_info()

    if self.render_mode == "human":
        self._render_frame()

    return observation, reward, terminated, False, info

def render(self):
    if self.render_mode == "rgb_array":
        return self._render_frame() 

def _render_frame(self):
    if self.window is None and self.render_mode == "human":
        pygame.init()
        pygame.display.init()
        self.window = pygame.display.set_mode(
            (self.window_size, self.window_size)
        )

    if self.clock is None and self.render_mode == "human":
        self.clock = pygame.time.Clock()

    canvas = pygame.Surface((self.window_size, self.window_size))
    canvas.fill((255, 255, 255))
    pix_square_size = (
        self.window_size / self.size
    )  # The size of a single grid square in pixels

    # First we draw the target
    pygame.draw.rect(
        canvas,
        (255, 0, 0),
        pygame.Rect(
            pix_square_size * self._target_location,
            (pix_square_size, pix_square_size),
        ),
    )

   # Now we draw the agent
    pygame.draw.circle(
        canvas,
        (0, 0, 255),
        (self._agent_location + 0.5) * pix_square_size,
        pix_square_size / 3,
    )

  # draw danger zones
    for x in range(self.size):
        for y in range(self.size):
            if self.danger_map[y,x]:
                pygame.draw.rect(
                    canvas,
                    (0, 0, 0),
                    pygame.Rect(
                        pix_square_size * np.array([x,y]),
                        (pix_square_size, pix_square_size),
                    ),
                )

 # Finally, add some gridlines
    for x in range(self.size + 1):
        pygame.draw.line(
            canvas,
            0,
            (0, pix_square_size * x),
            (self.window_size, pix_square_size * x),
            width=3,
        )
        pygame.draw.line(
            canvas,
            0,
            (pix_square_size * x, 0),
            (pix_square_size * x, self.window_size),
            width=3,
        )
    if self.render_mode == "human":
        # The following line copies our drawings from `canvas` to the visible window
        self.window.blit(canvas, canvas.get_rect())
        pygame.event.pump()
        pygame.display.update()
        # We need to ensure that human-rendering occurs at the predefined framerate.
        # The following line will automatically add a delay to keep the framerate stable.
        self.clock.tick(self.metadata["render_fps"])

    else:  # rgb_array
        return np.transpose(
            np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
        )

def close(self):
    if self.window is not None:
        pygame.display.quit()
        pygame.quit()

Relevant log output / Error message

There is no error, i need help in building the env

System Info

..

Checklist