Closed prinshul closed 10 months ago
Hi. This is a script for reporting HASAC in the MAMuJoCo environment.
def moving_average(a, window_size):
cumulative_sum = np.cumsum(np.insert(a, 0, 0))
middle = (cumulative_sum[window_size:] - cumulative_sum[:-window_size]) / window_size
r = np.arange(1, window_size-1, 2)
begin = np.cumsum(a[:window_size-1])[::2] / r
end = (np.cumsum(a[:-window_size:-1])[::2] / r)[::-1]
return np.concatenate((begin, middle, end))
def get_data(algo_path, task, algo, window):
results = {
'step': [],
'reward': []
}
for dir in os.listdir(algo_path):
path = os.path.join(algo_path, dir)
print(path)
data = np.loadtxt(os.path.join(
path, "progress.txt"), delimiter=",")
step = data[:, 0]
reward = data[:, 1]
results["reward"].append(reward)
results["step"].append(step)
results['reward_mean'] = moving_average(
np.mean(results['reward'], axis=0), window)
results['reward_std'] = moving_average(
np.std(results['reward'], axis=0), window)
results['step'] = results["step"][0]
return results
def plot_mamujoco(task, window):
env_path = os.path.join('mamujoco', task)
algo_path = os.path.join(env_path, 'hasac')
algo_path = os.path.join(algo_path, os.listdir(algo_path)[0])
data = get_data(algo_path, task, 'hasac', window)
step = data["step"]
reward_mean = data["reward_mean"]
reward_std = data["reward_std"]
plt.fill_between(step, reward_mean - reward_std, reward_mean +
reward_std, color=color_list[algo], alpha=0.1)
plt.plot(step, reward_mean, linewidth=1,
color='purple', label='HASAC')
plt.title(task)
plt.grid(linestyle="-.")
plt.ylabel("Episode Return")
plt.xlabel("Step")
plt.legend(loc='upper left')
# plt.show()
if not os.path.exists(f"output/{task}"):
os.makedirs(f"output/{task}")
plt.savefig(f'output/{task}/{task}_learning_curve.pdf')
plt.savefig(f'output/{task}/{task}_learning_curve.jpg')
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--task', type=str, default='Ant-v2_2x4')
parser.add_argument('--window-reward', type=int, default=7)
parser.add_argument('--fontsize', type=int, default=12)
args = parser.parse_args()
plt.rcParams['font.size'] = args.fontsize
plot_mamujoco(args.task, args.window_reward)
For moving average the mean and std are taken on how many runs?
At least three runs usually.
At least three runs usually.
Is it with different seeds? Because with same seed the rewards will be exactly same at different timesteps? And we won't get that shaded region around the curve.
Yes.
Hi
Is it possible for you to provide the visualization scripts for the plots of different algorithms with different environments reported in the paper. Anyone example on any env with some algos is also fine.
I checked this too: https://github.com/instadeepai/marl-eval But not sure how to use this with your code.
Thanks.