PKU-MARL / HARL

Official implementation of HARL algorithms based on PyTorch.
520 stars 64 forks source link

LAG tuned hyperparameters with MAPPO & HAPPO #26

Closed prinshul closed 10 months ago

prinshul commented 10 months ago

Hi

I ran LAG with MAPPO & HAPPO but the eval plot is looking weird. Can you please share the LAG hyperparameters for MAPPO & HAPPO? This is regarding fig. 10(c) in the paper: MAXIMUM ENTROPY HETEROGENEOUS-AGENT REINFORCEMENT LEARNING

Thanks.

guazimao commented 10 months ago

Hi. You can try the following hyperparameters.

{
    "algo_args":    {
        "algo": {
            "action_aggregation":   "prod",
            "actor_num_mini_batch": 2,
            "clip_param":   0.05,
            "critic_epoch": 5,
            "critic_num_mini_batch":    2,
            "entropy_coef": 0,
            "fixed_order":  true,
            "gae_lambda":   0.95,
            "gamma":    0.99,
            "huber_delta":  10.0,
            "max_grad_norm":    10.0,
            "ppo_epoch":    5,
            "share_param":  true,
            "use_clipped_value_loss":   true,
            "use_gae":  true,
            "use_huber_loss":   true,
            "use_max_grad_norm":    true,
            "use_policy_active_masks":  true,
            "value_loss_coef":  1
        },
        "device":   {
            "cuda": true,
            "cuda_deterministic":   true,
            "torch_threads":    4
        },
        "eval": {
            "eval_episodes":    20,
            "n_eval_rollout_threads":   10,
            "use_eval": true
        },
        "logger":   {
            "log_dir":  "./results"
        },
        "model":    {
            "activation_func":  "relu",
            "critic_lr":    0.0005,
            "data_chunk_length":    10,
            "gain": 0.01,
            "hidden_sizes": [
                256,
                256
            ],
            "initialization_method":    "orthogonal_",
            "lr":   0.0005,
            "opti_eps": 1e-05,
            "recurrent_n":  1,
            "std_x_coef":   1,
            "std_y_coef":   0.5,
            "use_feature_normalization":    true,
            "use_naive_recurrent_policy":   false,
            "use_recurrent_policy": false,
            "weight_decay": 0
        },
        "render":   {
            "render_episodes":  10,
            "use_render":   false
        },
        "seed": {
            "seed": 0,
            "seed_specify": true
        },
        "train":    {
            "episode_length":   1000,
            "eval_interval":    25,
            "log_interval": 5,
            "model_dir":    null,
            "n_rollout_threads":    20,
            "num_env_steps":    10000000,
            "use_linear_lr_decay":  false,
            "use_proper_time_limits":   true,
            "use_valuenorm":    true
        }
    },
    "env_args": {
        "scenario": "MultipleCombat",
        "task": "2v2/NoWeapon/vsBaseline"
    },
    "main_args":    {
        "algo": "mappo",
        "env":  "lag",
        "exp_name": "test",
        "load_config":  ""
    }
}
{
    "algo_args":    {
        "algo": {
            "action_aggregation":   "prod",
            "actor_num_mini_batch": 2,
            "clip_param":   0.05,
            "critic_epoch": 5,
            "critic_num_mini_batch":    2,
            "entropy_coef": 0,
            "fixed_order":  false,
            "gae_lambda":   0.95,
            "gamma":    0.99,
            "huber_delta":  10.0,
            "max_grad_norm":    10.0,
            "ppo_epoch":    5,
            "share_param":  false,
            "use_clipped_value_loss":   true,
            "use_gae":  true,
            "use_huber_loss":   true,
            "use_max_grad_norm":    true,
            "use_policy_active_masks":  true,
            "value_loss_coef":  1
        },
        "device":   {
            "cuda": true,
            "cuda_deterministic":   true,
            "torch_threads":    4
        },
        "eval": {
            "eval_episodes":    20,
            "n_eval_rollout_threads":   10,
            "use_eval": true
        },
        "logger":   {
            "log_dir":  "./results"
        },
        "model":    {
            "activation_func":  "relu",
            "critic_lr":    0.0005,
            "data_chunk_length":    10,
            "gain": 0.01,
            "hidden_sizes": [
                256,
                256
            ],
            "initialization_method":    "orthogonal_",
            "lr":   0.0005,
            "opti_eps": 1e-05,
            "recurrent_n":  1,
            "std_x_coef":   1,
            "std_y_coef":   0.5,
            "use_feature_normalization":    true,
            "use_naive_recurrent_policy":   false,
            "use_recurrent_policy": false,
            "weight_decay": 0
        },
        "render":   {
            "render_episodes":  10,
            "use_render":   false
        },
        "seed": {
            "seed": 0,
            "seed_specify": true
        },
        "train":    {
            "episode_length":   1000,
            "eval_interval":    25,
            "log_interval": 5,
            "model_dir":    null,
            "n_rollout_threads":    20,
            "num_env_steps":    10000000,
            "use_linear_lr_decay":  false,
            "use_proper_time_limits":   true,
            "use_valuenorm":    true
        }
    },
    "env_args": {
        "scenario": "MultipleCombat",
        "task": "2v2/NoWeapon/vsBaseline"
    },
    "main_args":    {
        "algo": "happo",
        "env":  "lag",
        "exp_name": "test",
        "load_config":  ""
    }
}
prinshul commented 10 months ago

Thank you.

Will the 2v2 Shootmissile hyperparameters for mappo and happo be similar?

guazimao commented 10 months ago

Yep. It should be similar.

prinshul commented 10 months ago

Thank you.