def reward(self, agent, world):
# Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
rew = 0
for l in world.landmarks:
dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
rew -= min(dists)
if agent.collide:
for a in world.agents:
if self.is_collision(a, agent):
rew -= 1
return rew
The agent is checking collision with all the agents including itself.
The agent is checking collision with all the agents including itself.