Feature/dreamer multiple env

Summary

Describe the purpose of the pull request, including:

This PR fixes #63.

Type of Change

Please select the one relevant option below:

New feature (non-breaking change that adds functionality)

Checklist

Please confirm that the following tasks have been completed:

[x] I have tested my changes locally and they work as expected. (Please describe the tests you performed.)
[x] I have added unit tests for my changes, or updated existing tests if necessary.
[x] I have updated the documentation, if applicable.
[x] I have installed pre-commit and run locally for my code changes.

Screenshots or Visuals (Optional)

run with the following hyperparameters:

{
    "exp_name": "default",
    "seed": 5,
    "dry_run": false,
    "torch_deterministic": false,
    "env_id": "MsPacmanNoFrameskip-v0",
    "num_envs": 8,
    "sync_env": false,
    "root_dir": null,
    "run_name": null,
    "action_repeat": 4,
    "memmap_buffer": true,
    "checkpoint_every": 2000,
    "checkpoint_path": null,
    "share_data": false,
    "per_rank_batch_size": 16,
    "per_rank_sequence_length": 64,
    "total_steps": 400000,
    "capture_video": true,
    "buffer_size": 2000000,
    "learning_starts": 5000,
    "pretrain_steps": 1,
    "gradient_steps": 1,
    "train_every": 32,
    "checkpoint_buffer": true,
    "buffer_type": "sequential",
    "prioritize_ends": false,
    "world_lr": 0.0002,
    "actor_lr": 4e-05,
    "critic_lr": 0.0001,
    "horizon": 15,
    "gamma": 0.999,
    "lmbda": 0.95,
    "use_continues": true,
    "stochastic_size": 32,
    "discrete_size": 32,
    "hidden_size": 512,
    "recurrent_state_size": 512,
    "kl_balancing_alpha": 0.8,
    "kl_free_nats": 0.0,
    "kl_free_avg": true,
    "kl_regularizer": 0.1,
    "continue_scale_factor": 5.0,
    "actor_ent_coef": 0.001,
    "actor_init_std": 0.0,
    "actor_min_std": 0.1,
    "actor_distribution": "auto",
    "clip_gradients": 100.0,
    "dense_units": 512,
    "mlp_layers": 2,
    "cnn_channels_multiplier": 32,
    "dense_act": "ELU",
    "cnn_act": "ELU",
    "critic_target_network_update_freq": 100,
    "layer_norm": true,
    "objective_mix": 1.0,
    "expl_amount": 0.0,
    "expl_decay": false,
    "expl_min": 0.0,
    "max_step_expl_decay": 0,
    "max_episode_steps": 108000,
    "atari_noop_max": 30,
    "clip_rewards": true,
    "grayscale_obs": false,
    "cnn_keys": [
        "rgb"
    ],
    "mlp_keys": null,
    "mine_min_pitch": -60,
    "mine_max_pitch": 60,
    "mine_start_position": null,
    "minerl_dense": false,
    "minerl_extreme": false,
    "mine_break_speed": 100,
    "mine_sticky_attack": 30,
    "mine_sticky_jump": 10
}

Thank you for your contribution! Once you have filled out this template, please ensure that you have assigned the appropriate reviewers and that all tests have passed.

Eclectic-Sheep / sheeprl

Feature/dreamer multiple env #64

Summary

Type of Change

Checklist

Screenshots or Visuals (Optional)