rlworkgroup / garage

A toolkit for reproducible reinforcement learning research.
MIT License
1.85k stars 309 forks source link

Summary file with experiment parameters #1785

Open chelseas opened 4 years ago

chelseas commented 4 years ago

This is a feature request. I was playing around with rl_coach and it automatically generates a json file that contains all of an experiment's parameters. It's really helpful for repeatability and record keeping. Would love if garage had something like this too.

data_store: 
None
name: 
simple_rl_graph
environments: 
0: <rl_coach.environments.gym_environment.GymEnvironment object at 0x7f5cb7e30710>
q
level_managers: 
0: <rl_coach.level_manager.LevelManager object at 0x7f5cb7de0eb8>

_phase: 
RunPhase.UNDEFINED
reset_required: 
False
env_params: 
"GymVectorEnvironment" {
    "additional_simulator_parameters": {},
    "custom_reward_threshold": null,
    "default_input_filter": {
        "_observation_filters": {},
        "_reward_filters": {},
        "i_am_a_reference_filter": false,
        "name": "no_input_filter",
        "__class__": "NoInputFilter"
    },
    "default_output_filter": {
        "_action_filters": {},
        "i_am_a_reference_filter": false,
        "name": null,
        "__class__": "NoOutputFilter"
    },
    "experiment_path": "./experiments/bob213/21_07_2020-14_29",
    "frame_skip": 1,
    "human_control": false,
    "level": "CartPole-v0",
    "max_over_num_frames": 1,
    "observation_space_type": null,
    "random_initialization_steps": 0,
    "seed": null,
    "target_success_rate": 1.0
}

agent_params: 
"PolicyGradientsAgentParameters" {
    "algorithm": {
        "act_for_full_episodes": false,
        "apply_gradients_every_x_episodes": 5,
        "beta_entropy": 0,
        "discount": 0.99,
        "distributed_coach_synchronization_type": null,
        "heatup_using_network_decisions": false,
        "in_action_space": null,
        "load_memory_from_file_path": null,
        "n_step": -1,
        "num_consecutive_playing_steps": {
            "_num_steps": 1,
            "__class__": "EnvironmentSteps"
        },
        "num_consecutive_training_steps": 1,
        "num_steps_between_copying_online_weights_to_target": {
            "_num_steps": 0,
            "__class__": "TrainingSteps"
        },
        "num_steps_between_gradient_updates": 20000,
        "override_episode_rewards_with_the_last_transition_reward": false,
        "policy_gradient_rescaler": {
            "_value_": 3,
            "__objclass__": "<enum 'PolicyGradientRescaler'>",
            "_name_": "FUTURE_RETURN_NORMALIZED_BY_TIMESTEP"
        },
        "rate_for_copying_weights_to_target": 1.0,
        "share_statistics_between_workers": true,
        "store_transitions_only_when_episodes_are_terminated": false,
        "supports_parameter_noise": false,
        "update_pre_network_filters_state_on_inference": true,
        "update_pre_network_filters_state_on_train": false,
        "use_accumulated_reward_as_measurement": false,
        "__class__": "PolicyGradientAlgorithmParameters"
    },
    "current_episode": 0,
    "exploration": {
        "action_space": {
            "_high": "array([1.])",
            "_low": "array([0.])",
            "_shape": "array([1])",
            "default_action": 0,
            "descriptions": {},
            "num_dimensions": 1,
            "num_elements": 1,
            "__class__": "DiscreteActionSpace"
        },
        "__class__": "CategoricalParameters"
    },
    "full_name_id": "main_level/agent",
    "input_filter": {
        "_observation_filters": {},
        "_reward_filters": {
            "rescale": {
                "name": null,
                "rescale_factor": 0.005,
                "supports_batching": false,
                "__class__": "RewardRescaleFilter"
            }
        },
        "i_am_a_reference_filter": false,
        "name": "input_filter",
        "__class__": "InputFilter"
    },
    "is_a_highest_level_agent": true,
    "is_a_lowest_level_agent": true,
    "is_batch_rl_training": false,
    "memory": {
        "load_memory_from_file_path": null,
        "shared_memory": false,
        "__class__": "SingleEpisodeBufferParameters"
    },
    "name": "agent",
    "network_wrappers": {
        "main": {
            "adam_optimizer_beta1": 0.9,
            "adam_optimizer_beta2": 0.99,
            "async_training": true,
            "batch_size": 32,
            "clip_gradients": null,
            "create_target_network": false,
            "embedding_merger_type": {
                "_value_": 0,
                "__objclass__": "<enum 'EmbeddingMergerType'>",
                "_name_": "Concat"
            },
            "force_cpu": false,
            "framework": {
                "_value_": "TensorFlow",
                "__objclass__": "<enum 'Frameworks'>",
                "_name_": "tensorflow"
            },
            "gradients_clipping_method": {
                "_value_": 0,
                "__objclass__": "<enum 'GradientClippingMethod'>",
                "_name_": "ClipByGlobalNorm"
            },
            "heads_parameters": {
                "0": {
                    "activation_function": "tanh",
                    "dense_layer": null,
                    "is_training": false,
                    "loss_weight": 1.0,
                    "name": "policy_head_params",
                    "num_output_head_copies": 1,
                    "parameterized_class_name": "PolicyHead",
                    "rescale_gradient_from_head_by_factor": 1.0,
                    "__class__": "PolicyHeadParameters"
                }
            },
            "input_embedders_parameters": {
                "observation": {
                    "activation_function": "relu",
                    "batchnorm": false,
                    "dense_layer": null,
                    "dropout_rate": 0.0,
                    "input_clipping": null,
                    "input_offset": {
                        "image": 0.0,
                        "tensor": 0.0,
                        "vector": 0.0
                    },
                    "input_rescaling": {
                        "image": 255.0,
                        "tensor": 1.0,
                        "vector": 1.0
                    },
                    "is_training": false,
                    "name": "embedder",
                    "scheme": {
                        "_value_": "Medium",
                        "__objclass__": "<enum 'EmbedderScheme'>",
                        "_name_": "Medium"
                    },
                    "__class__": "InputEmbedderParameters"
                }
            },
            "l2_regularization": 0,
            "learning_rate": 0.0005,
            "learning_rate_decay_rate": 0,
            "learning_rate_decay_steps": 0,
            "middleware_parameters": {
                "activation_function": "relu",
                "batchnorm": false,
                "dense_layer": null,
                "dropout_rate": 0.0,
                "is_training": false,
                "name": "middleware_fc_embedder",
                "num_streams": 1,
                "parameterized_class_name": "FCMiddleware",
                "scheme": {
                    "_value_": "Medium",
                    "__objclass__": "<enum 'MiddlewareScheme'>",
                    "_name_": "Medium"
                },
                "__class__": "FCMiddlewareParameters"
            },
            "optimizer_epsilon": 0.0001,
            "optimizer_type": "Adam",
            "replace_mse_with_huber_loss": false,
            "rms_prop_optimizer_decay": 0.9,
            "scale_down_gradients_by_number_of_workers_for_sync_training": true,
            "sess": null,
            "shared_optimizer": true,
            "softmax_temperature": 1,
            "tensorflow_support": true,
            "use_separate_networks_per_head": false,
            "__class__": "PolicyGradientNetworkParameters"
        }
    },
    "output_filter": {
        "_action_filters": {},
        "i_am_a_reference_filter": false,
        "name": "output_filter",
        "__class__": "NoOutputFilter"
    },
    "pre_network_filter": {
        "_observation_filters": {},
        "_reward_filters": {},
        "i_am_a_reference_filter": false,
        "name": "pre_network_filter",
        "__class__": "NoInputFilter"
    },
    "task_parameters": {
        "apply_stop_condition": false,
        "checkpoint_restore_path": null,
        "checkpoint_save_dir": null,
        "checkpoint_save_secs": null,
        "evaluate_only": null,
        "experiment_path": "./experiments/bob213/21_07_2020-14_29",
        "export_onnx_graph": false,
        "framework_type": {
            "_value_": "TensorFlow",
            "__objclass__": "<enum 'Frameworks'>",
            "_name_": "tensorflow"
        },
        "num_gpu": 1,
        "seed": null,
        "task_index": 0,
        "use_cpu": false,
        "__class__": "TaskParameters"
    },
    "visualization": {
        "add_rendered_image_to_env_response": false,
        "dump_csv": true,
        "dump_gifs": false,
        "dump_in_episode_signals": false,
        "dump_mp4": false,
        "dump_parameters_documentation": true,
        "dump_signals_to_csv_every_x_episodes": 5,
        "max_fps_for_human_control": 10,
        "native_rendering": false,
        "print_networks_summary": false,
        "render": false,
        "tensorboard": false,
        "video_dump_filters": {
            "0": {
                "run_phases": {
                    "0": {
                        "_value_": "Testing",
                        "__objclass__": "<enum 'RunPhase'>",
                        "_name_": "TEST"
                    }
                },
                "__class__": "SelectedPhaseOnlyDumpFilter"
            },
            "1": {
                "max_reward_achieved": -Infinity,
                "__class__": "MaxDumpFilter"
            }
        },
        "__class__": "VisualizationParameters"
    }
}

graph_creation_time: 
1595366960.1503983
time_metric: 
TimeTypes.EpisodeNumber
visualization_parameters: 
"VisualizationParameters" {
    "add_rendered_image_to_env_response": false,
    "dump_csv": true,
    "dump_gifs": false,
    "dump_in_episode_signals": false,
    "dump_mp4": false,
    "dump_parameters_documentation": true,
    "dump_signals_to_csv_every_x_episodes": 5,
    "max_fps_for_human_control": 10,
    "native_rendering": false,
    "print_networks_summary": false,
    "render": false,
    "tensorboard": false,
    "video_dump_filters": {
        "0": {
            "run_phases": {
                "0": {
                    "_value_": "Testing",
                    "__objclass__": "<enum 'RunPhase'>",
                    "_name_": "TEST"
                }
            },
            "__class__": "SelectedPhaseOnlyDumpFilter"
        },
        "1": {
            "max_reward_achieved": -Infinity,
            "__class__": "MaxDumpFilter"
        }
    }
}

checkpoint_saver: 
<rl_coach.saver.SaverCollection object at 0x7f5cb7df30b8>
is_batch_rl: 
False
sess: 
<tensorflow.python.client.session.Session object at 0x7f5cb7e30160>
preset_validation_params: 
"PresetValidationParameters" {
    "max_episodes_to_achieve_reward": 550,
    "min_reward_threshold": 130,
    "num_workers": 1,
    "read_csv_tries": 200,
    "reward_test_level": null,
    "test": true,
    "test_using_a_trace_test": true,
    "trace_max_env_steps": 5000,
    "trace_test_levels": null
}

task_parameters: 
"TaskParameters" {
    "apply_stop_condition": false,
    "checkpoint_restore_path": null,
    "checkpoint_save_dir": null,
    "checkpoint_save_secs": null,
    "evaluate_only": null,
    "experiment_path": "./experiments/bob213/21_07_2020-14_29",
    "export_onnx_graph": false,
    "framework_type": {
        "_value_": "TensorFlow",
        "__objclass__": "<enum 'Frameworks'>",
        "_name_": "tensorflow"
    },
    "num_gpu": 1,
    "seed": null,
    "task_index": 0,
    "use_cpu": false
}

top_level_manager: 
<rl_coach.level_manager.LevelManager object at 0x7f5cb7de0eb8>
heatup_steps: 
<rl_coach.core_types.EnvironmentSteps object at 0x7f5cb7e1dfd0>
last_checkpoint_saving_time: 
1595366960.148359
checkpoint_id: 
0
checkpoint_state_updater: 
None
improve_steps: 
<rl_coach.core_types.TrainingSteps object at 0x7f5cb7e1df28>
evaluation_steps: 
<rl_coach.core_types.EnvironmentEpisodes object at 0x7f5cb7e1df98>
steps_between_evaluation_periods: 
<rl_coach.core_types.EnvironmentEpisodes object at 0x7f5cb7e1df60>
total_steps_counters: 
RunPhase.TRAIN: <rl_coach.core_types.TotalStepsCounter object at 0x7f5cb7e305c0>
RunPhase.HEATUP: <rl_coach.core_types.TotalStepsCounter object at 0x7f5cb7e30588>
RunPhase.TEST: <rl_coach.core_types.TotalStepsCounter object at 0x7f5cb7e305f8>

graph_logger: 
<rl_coach.logger.Logger object at 0x7f5cb7e30630>
krzentner commented 4 years ago

That's an interesting feature. I can certainly see why something like that would be convenient. Right now, there are basically two ways of getting similar information from garage: The variant.json file (which contains only the explicit hyper-parameters to the experiment function), and the snapshots (which need to be unpickled first, and are thus much more work to use for checking experiment details). Theoretically the combination of metadata.json and launch_archive.tar.xz should also let one reconstruct this information, but that's even more work.

Presumably, if we were to implement this, we would do it using the pickle protocol (which might also help us debug some weird problems we've had with pickling).

ryanjulian commented 4 years ago

@krzentner I don't think it would be especially difficult, do you? We just need to walk the object tree passed to the Snapshotter and output the tree of (type, *args, **kwargs, attributes).

We have always imagined these output formats as being only valuable if they can be deserialized, but as @chelseas points out, they have utility even if you can't deserialize the output back into an experiment.