Closed zly987 closed 2 years ago
what bugs in model.py do you think need to be fixed?
def get_loss(self, batch): batch_size = len(batch.obs) obs, state, actions, old_log_prob_a, old_values, old_next_values, \ rewards, next_obs, next_state, done, last_step, actions_avail, \ last_hid_policy, hid_policy, last_hid_value, hid_value = self.unpack_data(batch)
Function: unpack_data()
why should this function be fixed? Could you give more details?
in model.py
def unpack_data(self, batch): reward = th.tensor(batch.reward, dtype=th.float).to(self.device) last_step = th.tensor(batch.last_step, dtype=th.float).to(self.device).contiguous().view(-1, 1) done = th.tensor(batch.done, dtype=th.float).to(self.device).contiguous().view(-1, 1) action = th.tensor(np.concatenate(batch.action, axis=0), dtype=th.float).to(self.device) log_prob_a = th.tensor(np.concatenate(batch.action, axis=0), dtype=th.float).to(self.device) value = th.tensor(np.concatenate(batch.value, axis=0), dtype=th.float).to(self.device) next_value = th.tensor(np.concatenate(batch.next_value, axis=0), dtype=th.float).to(self.device) state = prep_obs( list( zip(batch.state) ) ).to(self.device) next_state = prep_obs( list( zip(batch.next_state) ) ).to(self.device) action_avail = th.tensor( np.concatenate(batch.action_avail, axis=0) ).to(self.device) last_hid = th.tensor(np.concatenate(batch.last_hid, axis=0), dtype=th.float).to(self.device) hid = th.tensor(np.concatenate(batch.hid, axis=0), dtype=th.float).to(self.device) if self.args.reward_normalisation: reward = self.batchnorm(reward).to(self.device) return (state, action, log_prob_a, value, next_value, reward, next_state, done, last_step, action_avail, last_hid, hid)
Thank you very much for your patience in answering my questions.
unpack_data( ) function is defined differently between the model.py and matd3.py
I mean what bugs exist in this function? I didn't observe anything that needs to be fixed.
in model.py uppack_data return 12 variable,but in matd3.py upack_data return 16 variable
I have fixed the inconsistent problem between model.py and matd3.py. Please try it again. If there exists any issues, please report it without any hesitation.
Ok,Thank you very much!
in matd3.py
`
def get_loss(self, batch):
batch_size = len(batch.state)
state, actions, old_log_prob_a, old_values, old_next_values, rewards, next_state, done, last_step, actions_avail, last_hids, hids = self.unpack_data(
batch)
_, actions_pol, log_prob_a, action_out, _ = self.get_actions(state, status='train', exploration=False,actions_avail=actions_avail, target=False,
last_hid=last_hids)
# _, next_actions, _, _, _ = self.get_actions(next_obs, status='train', exploration=True, actions_avail=actions_avail, target=True, last_hid=hids)
if self.args.double_q:
_, next_actions, _, _, _ = self.get_actions(next_state, status='train', exploration=True,actions_avail=actions_avail, target=False, last_hid=hids,
clip=True)
else:
_, next_actions, _, _, _ = self.get_actions(next_state, status='train', exploration=True,actions_avail=actions_avail, target=True, last_hid=hids,
clip=True)
compose_pol, _ = self.value(state, state, actions_pol)
values_pol = compose_pol[:batch_size, :]
values_pol = values_pol.contiguous().view(-1, self.n_)
compose, _ = self.value(state, state, actions)
`
''self.value(state, state, actions) '' The input parameters are 3,but in matd3.py defined as "def value(self, obs, act):"
in matd3.py
def get_actions(self, obs, status, exploration, actions_avail, target=False, last_hid=None, clip=False):
but in model.py
` def train_process(self, stat, trainer):
stat_train = {'mean_train_reward': 0}
if self.args.episodic:
episode = []
# reset env
state, global_state = trainer.env.reset()
# init hidden states
last_hid = self.policy_dicts[0].init_hidden()
for t in range(self.args.max_steps):
# current state, action, value
state_ = prep_obs(state).to(self.device).contiguous().view(1, self.n_, self.obs_dim)
action, action_pol, log_prob_a, _, hid = self.get_actions(state_, status='train', exploration=True,actions_avail=
th.tensor(trainer.env.get_avail_actions()), target=False, last_hid=last_hid)
value = self.value(state_, action_pol)
_, actual = translate_action(self.args, action, trainer.env)
`
Signature of method 'MATD3.get_actions()' does not match signature of base method in class 'Model'
Please help me !!!!!!
in matd3.py def get_actions(self, obs, status, exploration, actions_avail, target=False, last_hid=None, clip=False):
but in model.py
` def train_process(self, stat, trainer):
stat_train = {'mean_train_reward': 0}
if self.args.episodic:
episode = []
# reset env
state, global_state = trainer.env.reset()
# init hidden states
last_hid = self.policy_dicts[0].init_hidden()
for t in range(self.args.max_steps):
# current state, action, value
state_ = prep_obs(state).to(self.device).contiguous().view(1, self.n_, self.obs_dim)
action, action_pol, log_prob_a, _, hid = self.get_actions(state_, status='train', exploration=True,actions_avail=
th.tensor(trainer.env.get_avail_actions()), target=False, last_hid=last_hid)
value = self.value(state_, action_pol)
_, actual = translate_action(self.args, action, trainer.env)
`
Signature of method 'MATD3.get_actions()' does not match signature of base method in class 'Model'
Please help me !!!!!!
fixed. Please try it.
Ok,Thank you very much!
Thank you very much for updating the code! You have fix matd3 and sqddpg,but not fix the ''model.py'' file.