Unity-Technologies / ml-agents

The Unity Machine Learning Agents Toolkit (ML-Agents) is an open-source project that enables games and simulations to serve as environments for training intelligent agents using deep reinforcement learning and imitation learning.
https://unity.com/products/machine-learning-agents
Other
17.12k stars 4.15k forks source link

Postprocessing of .demo files #5837

Closed DavideSignori closed 1 year ago

DavideSignori commented 1 year ago

Is your feature request related to a problem? Please describe. Hi I am encountering an issue with the demonstration recorder, i would like to be able to remove unnecessary information (actions or observations) from the .demo file after the recording took place. I have to do this because i can filter out unnecessary information only after the demonstration is completed. Is there a way i can do this?

Describe alternatives you've considered It would be even helpful if there was a way to perform such processing before the file is saved with the .demo extension. Thanks!

cm107 commented 1 year ago

It's very tedious, but you could modify some of the methods in demo_loader.

I created some utilities for modifying the demo files in my environment. You would need to make a few changes in order to get it to work for your environment though. (model name, etc.)

demo_util.py ```python import random from mlagents.trainers.demo_loader import get_demo_files, load_demonstration, write_demo from mlagents_envs.communicator_objects.demonstration_meta_pb2 import DemonstrationMetaProto from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto from mlagents_envs.communicator_objects.space_type_pb2 import SpaceTypeProto from mlagents_envs.communicator_objects.brain_parameters_pb2 import ActionSpecProto from mlagents_envs.communicator_objects.observation_pb2 import ObservationProto, CompressionTypeProto, ObservationTypeProto from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import AgentInfoActionPairProto from google.protobuf.internal.decoder import _DecodeVarint32 # type: ignore from google.protobuf.internal.encoder import _EncodeVarint # type: ignore from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto INITIAL_POS = 33 SUPPORTED_DEMONSTRATION_VERSIONS = frozenset([0, 1]) def debug_read_demo(path: str): behavior_spec = None brain_param_proto = None info_action_pairs = [] total_expected = 0 with open(path, "rb") as fp: data = fp.read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos : pos + next_pos]) if ( meta_data_proto.api_version not in SUPPORTED_DEMONSTRATION_VERSIONS ): raise RuntimeError( f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})" ) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos : pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info_action = AgentInfoActionPairProto() agent_info_action.ParseFromString(data[pos : pos + next_pos]) if behavior_spec is None: behavior_spec = behavior_spec_from_proto( brain_param_proto, agent_info_action.agent_info ) info_action_pairs.append(agent_info_action) if len(info_action_pairs) == total_expected: break pos += next_pos obs_decoded += 1 print(f"meta_data_proto: {meta_data_proto}") print(f"brain_param_proto: {brain_param_proto}") print('') print(brain_param_proto.action_spec.action_descriptions) def get_demonstration_meta_proto( api_version: int=1, demonstration_name: str="IKExcavator", number_steps: int=99999, number_episodes: int=99, mean_reward: float=12.345 ) -> DemonstrationMetaProto: demonstration_meta_proto = DemonstrationMetaProto() demonstration_meta_proto.api_version = api_version demonstration_meta_proto.demonstration_name = demonstration_name demonstration_meta_proto.number_steps = number_steps demonstration_meta_proto.number_episodes = number_episodes demonstration_meta_proto.mean_reward = mean_reward return demonstration_meta_proto def get_brain_parameters_proto( agent_name: str="IK_Excavator", team_id: int=0, is_training: bool=False, num_continuous_actions: int=0, num_discrete_actions: int=4, discrete_branch_sizes: int=[3,3,3,3], action_descriptions: list[str]=[] ) -> BrainParametersProto: brain_parameters_proto = BrainParametersProto() brain_parameters_proto.vector_action_size_deprecated.MergeFrom(discrete_branch_sizes) brain_parameters_proto.vector_action_descriptions_deprecated.MergeFrom(action_descriptions) brain_parameters_proto.vector_action_space_type_deprecated = SpaceTypeProto.continuous \ if num_continuous_actions > 0 else SpaceTypeProto.discrete brain_parameters_proto.brain_name = f"{agent_name}?team={team_id}" brain_parameters_proto.is_training = is_training # ? action_spec_proto = ActionSpecProto() action_spec_proto.num_continuous_actions = num_continuous_actions action_spec_proto.num_discrete_actions = num_discrete_actions action_spec_proto.discrete_branch_sizes.MergeFrom(discrete_branch_sizes) action_spec_proto.action_descriptions.MergeFrom(action_descriptions) brain_parameters_proto.action_spec.CopyFrom(action_spec_proto) return brain_parameters_proto def get_frame_pair_proto( gs_obs: list[float]=[1, 0], other_obs: list[float]=None, reward: float=0.0, done: bool=False, max_step_reached: bool=False, id: int=0, discrete_actions: list[int]=[1,1,1,1] ) -> AgentInfoActionPairProto: def get_gs_obs_proto(val: list[float]=[1, 0]) -> ObservationProto: observation_proto = ObservationProto() obs_float_proto = ObservationProto.FloatData() obs_count = len(val) obs_float_proto.data.extend(val) observation_proto.shape.MergeFrom([obs_count]) observation_proto.compression_type = CompressionTypeProto.NONE observation_proto.float_data.CopyFrom(obs_float_proto) observation_proto.compressed_channel_mapping.MergeFrom([]) observation_proto.dimension_properties.MergeFrom([1]) # ? observation_proto.observation_type = ObservationTypeProto.GOAL_SIGNAL observation_proto.name = 'GoalScoopSensor' return observation_proto def get_other_obs_proto(val: list[float]=None) -> ObservationProto: observation_proto = ObservationProto() obs_float_proto = ObservationProto.FloatData() if val is None: val = [] for j in range(4): for i in range(17): val.append(i + j) obs_float_proto.data.MergeFrom(val) observation_proto.shape.MergeFrom([len(val)]) observation_proto.compression_type = CompressionTypeProto.NONE observation_proto.float_data.CopyFrom(obs_float_proto) observation_proto.compressed_channel_mapping.MergeFrom([]) observation_proto.dimension_properties.MergeFrom([1]) # ? observation_proto.observation_type = ObservationTypeProto.DEFAULT observation_proto.name = 'StackingSensor_size4_VectorSensor_size17' return observation_proto agent_info_proto = AgentInfoProto() agent_info_proto.reward = reward agent_info_proto.done = done agent_info_proto.max_step_reached = max_step_reached agent_info_proto.id = id agent_info_proto.action_mask.MergeFrom([]) agent_info_proto.observations.append(get_gs_obs_proto(gs_obs)) agent_info_proto.observations.append(get_other_obs_proto(other_obs)) agent_info_proto.group_id = 0 agent_info_proto.group_reward = 0 agent_action_proto = AgentActionProto() agent_action_proto.vector_actions_deprecated.MergeFrom([]) agent_action_proto.value = 0.0 agent_action_proto.continuous_actions.MergeFrom([]) agent_action_proto.discrete_actions.MergeFrom(discrete_actions) pair_proto = AgentInfoActionPairProto() pair_proto.agent_info.CopyFrom(agent_info_proto) pair_proto.action_info.CopyFrom(agent_action_proto) return pair_proto from mlagents_envs.base_env import BehaviorSpec from mlagents_envs.timers import hierarchical_timer, timed @timed def load_demo( file_path: str, ) -> tuple[DemonstrationMetaProto, BrainParametersProto, list[AgentInfoActionPairProto]]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. file_paths = get_demo_files(file_path) behavior_spec = None brain_param_proto = None info_action_pairs = [] total_expected = 0 for _file_path in file_paths: with open(_file_path, "rb") as fp: with hierarchical_timer("read_file"): data = fp.read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos : pos + next_pos]) if ( meta_data_proto.api_version not in SUPPORTED_DEMONSTRATION_VERSIONS ): raise RuntimeError( f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})" ) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos : pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info_action = AgentInfoActionPairProto() agent_info_action.ParseFromString(data[pos : pos + next_pos]) if behavior_spec is None: behavior_spec = behavior_spec_from_proto( brain_param_proto, agent_info_action.agent_info ) info_action_pairs.append(agent_info_action) if len(info_action_pairs) == total_expected: break pos += next_pos obs_decoded += 1 if not behavior_spec: raise RuntimeError( f"No BrainParameters found in demonstration file at {file_path}." ) return meta_data_proto, brain_param_proto, info_action_pairs def example_write_demo(path: str='test.demo'): brain_parameters_proto = get_brain_parameters_proto( agent_name='IK_Excavator', num_discrete_actions=4, discrete_branch_sizes=[3,3,3,3] ) print(f"brain_parameters_proto\n{brain_parameters_proto}") agent_info_protos = [] for i in range(100): agent_info_protos.append( get_frame_pair_proto( gs_obs=[1,0], other_obs=[random.random() for j in range(17*4)], reward=random.random()*10, done=random.random() < 0.1, discrete_actions=[random.choice(list(range(size))) for size in brain_parameters_proto.action_spec.discrete_branch_sizes] ) ) demonstration_meta_proto = get_demonstration_meta_proto( demonstration_name='test', number_steps=len(agent_info_protos), number_episodes=list.count([agent.agent_info.done for agent in agent_info_protos], True), mean_reward=sum([agent.agent_info.reward for agent in agent_info_protos]) / len(agent_info_protos) ) print(f"demonstration_meta_proto\n{demonstration_meta_proto}") write_demo( demo_path=path, meta_data_proto=demonstration_meta_proto, brain_param_proto=brain_parameters_proto, agent_info_protos=agent_info_protos ) print(load_demonstration(path)) ```
remove_rewards_from_demo.py ```python import os from mlagents.trainers.demo_loader import write_demo, \ get_demo_files from demo_util import load_demo src_folder = "/home/clayton/Unity/Projects/excav_simul_3d/Demo/GSSReduced0" dst_folder = "/home/clayton/Unity/Projects/excav_simul_3d/Demo/GSSReduced0_NoRewardDemo" src_paths = get_demo_files(src_folder) os.makedirs(dst_folder, exist_ok=True) for src_path in src_paths: src_filename = os.path.basename(src_path) dst_path = f"{dst_folder}/{src_filename}" meta_data_proto, brain_param_proto, info_action_pairs = load_demo(src_path) for i in range(len(info_action_pairs)): info_action_pairs[i].agent_info.reward = 0 meta_data_proto.mean_reward = 0 write_demo( demo_path=dst_path, meta_data_proto=meta_data_proto, brain_param_proto=brain_param_proto, agent_info_protos=info_action_pairs ) print(f"{src_path}\n->{dst_path}") ```

@miguelalonsojr I would also like mlagents to provide an easier way to modify the demo files after they are created. For example, suppose I spend 2 hours recording a demo for my agent, train, and then realize that one of the observations is causing problems. I don't want to have to re-record the demos again. It would be easier to just loop through all of the observations in the demo file and remove the problematic observation. I think that ml-agents should provide utilities that would make this possible.