Closed kosmylo closed 2 months ago
You need to manually override the following functions: get_action(self)
, get_total_actions(self)
, get_avail_actions(self)
, get_avail_agent_actions(self, agent_id)
and get_num_of_agents(self)
Yes, I have modified the functions that you have mentioned.
Just to give a better idea, I am trying to replicate your logic in a different environment for flexibility provision. In this case I have some agents that represent buildings and each agent can control different variables of a building.
I include some code to explain you the logic:
self.action_space = ActionSpace(low=self.args.action_low, high=self.args.action_high)
self.n_agents = len(self.base_powergrid['buildings'])
self.n_actions = 4 # P_red, P_esc, P_esd, Q_pv for each building
self.agent_ids = self.base_powergrid['buildings'] # Agent IDs equal to bus IDs with buildings
def get_avail_actions(self):
"""return available actions for all agents
"""
avail_actions = []
for agent_id in range(self.n_agents):
avail_actions.append(self.get_avail_agent_actions(agent_id))
return np.expand_dims(np.array(avail_actions), axis=0)
def get_avail_agent_actions(self, agent_id):
"""Return the available actions for agent_id."""
return [1] * self.n_actions
def get_total_actions(self):
return self.n_actions
def get_num_of_agents(self):
"""return the number of agents"""
return self.n_agents
I am also attaching the step
fucntion.
def step(self, actions):
num_buildings = len(self.base_powergrid['buildings'])
# Initialize dictionaries to hold the parsed actions
percentage_reduction = {}
ess_charging = {}
ess_discharging = {}
q_pv = {}
for i in range(num_buildings):
# Each agent controls 4 actions
percentage_reduction[self.base_powergrid['buildings'][i]] = self.args.max_power_reduction * actions[i * 4]
ess_charging[self.base_powergrid['ESSs_at_buildings'][i]] = self.args.p_ch_max * actions[i * 4 + 1]
ess_discharging[self.base_powergrid['ESSs_at_buildings'][i]] = self.args.p_dis_max * actions[i * 4 + 2]
q_pv[self.base_powergrid['PVs_at_buildings'][i]] = self._scale_and_clip_q_pv(actions[i * 4 + 3], self.current_pv_power[self.base_powergrid['PVs_at_buildings'][i]])
result = power_flow_solver(
self.base_powergrid,
self.current_active_demand,
self.current_reactive_demand,
power_reduction,
self.current_pv_power,
q_pv,
ess_charging,
ess_discharging,
self.initial_ess_energy
)
voltages = result['Voltages']
ess_energy = result['Next ESS Energy']
self.current_voltage = voltages
self.current_ess_energy = ess_energy
reward, info = self.calculate_reward(power_reduction, ess_charging, ess_discharging, q_pv, voltages)
self.cumulative_reward += reward
self.set_demand_pv_prices()
terminated = self.steps >= self.episode_limit
self.steps += 1
# Update initial_ess_energy for the next step
self.initial_ess_energy = ess_energy
# Ensure the reward is a scalar value
total_reward = np.sum(reward)
# Store the values as attributes for later access
self.percentage_reduction = percentage_reduction
self.ess_charging = ess_charging
self.ess_discharging = ess_discharging
self.q_pv = q_pv
return total_reward, terminated, info
The issue is when I run train.py
:
GenericDict(gumbel_softmax=False, epsilon_softmax=False, softmax_eps=None, episodic=False, cuda=True, grad_clip_eps=1.0, save_model_freq=40, replay_warmup=0, policy_lrate=0.0001, value_lrate=0.0001, mixer_lrate=None, target=True, target_lr=0.1, entr=0.001, max_steps=240, batch_size=32, replay=True, replay_buffer_size=5000.0, agent_type='rnn', agent_id=True, shared_params=True, layernorm=True, mixer=False, gaussian_policy=False, LOG_STD_MIN=0.0, LOG_STD_MAX=0.5, fixed_policy_std=1.0, hid_activation='relu', init_type='normal', init_std=0.1, action_enforcebound=True, double_q=True, clip_c=1.0, gamma=0.99, hid_size=64, continuous=True, normalize_advantages=False, train_episodes_num=400, behaviour_update_freq=60, target_update_freq=120, policy_update_epochs=1, value_update_epochs=10, mixer_update_epochs=None, reward_normalisation=True, eval_freq=20, num_eval_episodes=10, action_low=0, action_high=1.0, action_bias=0.0, action_scale=1.0, agent_num=5, obs_size=6, state_size=110, action_dim=4)
Traceback (most recent call last):
File "/root/test/train.py", line 94, in <module>
train.run(stat, i)
File "/root/test/utils/trainer.py", line 109, in run
self.behaviour_net.train_process(stat, self)
File "/root/test/MADRL/models/model.py", line 214, in train_process
reward, done, info = trainer.env.step(actual)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/test/MADRL/environments/flex_provision/flexibility_provision_env.py", line 100, in step
percentage_reduction[self.base_powergrid['buildings'][i]] = self.args.max_power_reduction * actions[i * 4]
~~~~~~~^^^^^^^
IndexError: index 4 is out of bounds for axis 0 with size 4
Apparently the actions array has only 4 elements and this causes the error. Could you propose possible fixes?
I was able to identify where the error came from. First, according to the error, the shape of the actions was [1,1,4]
. However, I would expect [1,5,4]
. In the function get_actions(self, state, status, exploration, actions_avail, target=False, last_hid=None)
of maddpg.py
the variable means had the correct shape [1, 5, 4]
, but then the problem was due to the following part:
if means.size(-1) > 1:
means_ = means.sum(dim=1, keepdim=True)
log_stds_ = log_stds.sum(dim=1, keepdim=True)
else:
means_ = means
log_stds_ = log_stds
My question now is what is the purpose of this part?
In the
distributed
mode each agent is responsible to control one generator. In your case one agent has only one action. If I want to have multiple actions per agent what changes should I make?