Closed majid5776 closed 4 months ago
This is something you will have to add to your reward method of the scenario you are running.
def reward(self, agent: Agent):
is_first = agent == self.world.agents[0]
if is_first:
self.pos_rew[:] = 0
self.final_rew[:] = 0
for a in self.world.agents:
self.pos_rew += self.agent_reward(a)
a.agent_collision_rew[:] = 0
self.all_goal_reached = torch.all(
torch.stack([a.on_goal for a in self.world.agents], dim=-1),
dim=-1,
)
self.final_rew[self.all_goal_reached] = self.final_reward
for i, a in enumerate(self.world.agents):
for j, b in enumerate(self.world.agents):
if i <= j:
continue
if self.world.collides(a, b):
distance = self.world.get_distance(a, b)
a.agent_collision_rew[
distance <= self.min_collision_distance
] += self.agent_collision_penalty
b.agent_collision_rew[
distance <= self.min_collision_distance
] += self.agent_collision_penalty
pos_reward = self.pos_rew if self.shared_rew else agent.pos_rew
return pos_reward + self.final_rew + agent.agent_collision_rew
This is from the navigation example, so study that for a bit. But here it is what you need, just set the agent_collision_penalty
to the negative reward.
For adding object with non random positions, I would go look at the football example and specifically check the init_wall / reset_wall methods, as the walls here are spawn in the same position every time you run the environment.
def init_walls(self, world):
right_top_wall = Landmark(
name="Right Top Wall",
collide=True,
movable=False,
shape=Line(
length=self.pitch_width / 2 - self.agent_size - self.goal_size / 2,
),
color=Color.WHITE,
)
world.add_landmark(right_top_wall)
left_top_wall = Landmark(
name="Left Top Wall",
collide=True,
movable=False,
shape=Line(
length=self.pitch_width / 2 - self.agent_size - self.goal_size / 2,
),
color=Color.WHITE,
)
world.add_landmark(left_top_wall)
right_bottom_wall = Landmark(
name="Right Bottom Wall",
collide=True,
movable=False,
shape=Line(
length=self.pitch_width / 2 - self.agent_size - self.goal_size / 2,
),
color=Color.WHITE,
)
world.add_landmark(right_bottom_wall)
left_bottom_wall = Landmark(
name="Left Bottom Wall",
collide=True,
movable=False,
shape=Line(
length=self.pitch_width / 2 - self.agent_size - self.goal_size / 2,
),
color=Color.WHITE,
)
world.add_landmark(left_bottom_wall)
def reset_walls(self, env_index: int = None):
for landmark in self.world.landmarks:
if landmark.name == "Left Top Wall":
landmark.set_pos(
torch.tensor(
[
-self.pitch_length / 2,
self.pitch_width / 4 + self.goal_size / 4,
],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
landmark.set_rot(
torch.tensor(
[torch.pi / 2],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
elif landmark.name == "Left Bottom Wall":
landmark.set_pos(
torch.tensor(
[
-self.pitch_length / 2,
-self.pitch_width / 4 - self.goal_size / 4,
],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
landmark.set_rot(
torch.tensor(
[torch.pi / 2],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
elif landmark.name == "Right Top Wall":
landmark.set_pos(
torch.tensor(
[
self.pitch_length / 2,
self.pitch_width / 4 + self.goal_size / 4,
],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
landmark.set_rot(
torch.tensor(
[torch.pi / 2],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
elif landmark.name == "Right Bottom Wall":
landmark.set_pos(
torch.tensor(
[
self.pitch_length / 2,
-self.pitch_width / 4 - self.goal_size / 4,
],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
landmark.set_rot(
torch.tensor(
[torch.pi / 2],
dtype=torch.float32,
device=self.world.device,
),
batch_index=env_index,
)
Each entity has the set_rot and set_pos method, and then you can specify the exact position you want it to be in.
Hope it helps
thank for your reply it's amazing!
hello. I want to ask two thing that can make these envs more realistic. 1) can you please put negative reward when each of agents collide each other? I want to deny each agent to don't come too close to other agent. 2) can you help me how to put obstacles in discovery env and put targets in static specific points except randomly generated targets? thank you.