for i in range(self.num_agents):
obs_batch_i = obs_batch[i]
indiv_action_batch_i = indiv_action_batch[i]
indiv_reward_batch_i = indiv_reward_batch[i]
next_obs_batch_i = next_obs_batch[i]
next_global_actions = []
for agent in self.agents:
next_obs_batch_i = torch.FloatTensor(next_obs_batch_i)
indiv_next_action = agent.actor.forward(next_obs_batch_i) # ??next_obs_batch[idx] replace next_obs_batch_i
##******************* I think there should be:
for idx, agent in enumerate(self.agents):
indiv_next_action = agent.actor.forward(
torch.tensor(next_obs_batch[idx], dtype=torch.float).to(agent.device))
maddpg.py
def update(self, batch_size): obs_batch, indiv_action_batch, indiv_reward_batch, next_obs_batch, global_state_batch, global_actions_batch, global_next_state_batch, done_batch = self.replay_buffer.sample(batch_size)