ray-project / ray

Ray is an AI compute engine. Ray consists of a core distributed runtime and a set of AI Libraries for accelerating ML workloads.
https://ray.io
Apache License 2.0
33.48k stars 5.68k forks source link

No model is saved when specifying export_formats=[ExportFormat.H5] #17319

Closed strategynet123 closed 2 years ago

strategynet123 commented 3 years ago

What is the problem?

No model is saved when specifying export_formats=[ExportFormat.H5] Output looks like this:

PPO_SimpleCorridor_9113a_00002_2_lr=1e-06_2021-07-25_14-16-48 ls -altr total 360 -rw-r--r-- 1 dlf staff 253 25 Jul 14:16 params.json -rw-r--r-- 1 dlf staff 2028 25 Jul 14:16 params.pkl drwxr-xr-x 7 dlf staff 224 25 Jul 14:16 . -rw-r--r-- 1 dlf staff 39926 25 Jul 14:19 progress.csv -rw-r--r-- 1 dlf staff 64125 25 Jul 14:19 result.json -rw-r--r-- 1 dlf staff 68316 25 Jul 14:19 events.out.tfevents.1627219008.velocity drwxr-xr-x 37 dlf staff 1184 25 Jul 14:19 ..

Ray version and other system information (Python version, TensorFlow version, OS): Ray 1.4.1, TF 2.5.0, Mac OS X

Reproduction (REQUIRED)

import argparse import gym from gym.spaces import Discrete, Box import numpy as np import os import random

import ray from ray import tune from ray.tune import grid_search from ray.rllib.env.env_context import EnvContext from ray.rllib.models import ModelCatalog from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.tf.fcnet import FullyConnectedNetwork from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.test_utils import check_learning_achieved from ray.tune.trial import ExportFormat

tf1, tf, tfv = try_import_tf() torch, nn = try_import_torch()

parser = argparse.ArgumentParser() parser.add_argument( "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use.") parser.add_argument( "--framework", choices=["tf", "tf2", "tfe", "torch"], default="tf2", help="The DL framework specifier.") parser.add_argument( "--as-test", action="store_true", help="Whether this script should be run as a test: --stop-reward must " "be achieved within --stop-timesteps AND --stop-iters.") parser.add_argument( "--stop-iters", type=int, default=50, help="Number of iterations to train.") parser.add_argument( "--stop-timesteps", type=int, default=100000, help="Number of timesteps to train.") parser.add_argument( "--stop-reward", type=float, default=0.1, help="Reward at which we stop training.")

class SimpleCorridor(gym.Env): """Example of a custom env in which you have to walk down a corridor.

You can configure the length of the corridor via the env config."""

def __init__(self, config: EnvContext):
    self.end_pos = config["corridor_length"]
    self.cur_pos = 0
    self.action_space = Discrete(2)
    self.observation_space = Box(
        0.0, self.end_pos, shape=(1, ), dtype=np.float32)
    # Set the seed. This is only used for the final (reach goal) reward.
    self.seed(config.worker_index * config.num_workers)

def reset(self):
    self.cur_pos = 0
    return [self.cur_pos]

def step(self, action):
    assert action in [0, 1], action
    if action == 0 and self.cur_pos > 0:
        self.cur_pos -= 1
    elif action == 1:
        self.cur_pos += 1
    done = self.cur_pos >= self.end_pos
    # Produce a random reward when we reach the goal.
    return [self.cur_pos], \
        random.random() * 2 if done else -0.1, done, {}

def seed(self, seed=None):
    random.seed(seed)

class CustomModel(TFModelV2): """Example of a keras custom model that just delegates to an fc-net."""

def __init__(self, obs_space, action_space, num_outputs, model_config,
             name):
    super(CustomModel, self).__init__(obs_space, action_space, num_outputs,
                                      model_config, name)
    self.model = FullyConnectedNetwork(obs_space, action_space,
                                       num_outputs, model_config, name)

def forward(self, input_dict, state, seq_lens):
    return self.model.forward(input_dict, state, seq_lens)

def value_function(self):
    return self.model.value_function()

if name == "main": args = parser.parse_args() ray.init()

# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
ModelCatalog.register_custom_model(
    "my_test_model", TorchCustomModel
    if args.framework == "torch" else CustomModel)

config = {
    "env": SimpleCorridor,  # or "corridor" if registered above
    "env_config": {
        "corridor_length": 5,
    },
    # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
    "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
    "model": {
        "custom_model": "my_test_model",
        "vf_share_layers": True,
    },
    "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
    "num_workers": 1,  # parallelism
    "framework": args.framework,
}

stop = {
    "training_iteration": args.stop_iters,
    "timesteps_total": args.stop_timesteps,
    "episode_reward_mean": args.stop_reward,
}

results = tune.run(args.run, config=config, export_formats=**[ExportFormat.H5]**, stop=stop)

if args.as_test:
    check_learning_achieved(results, args.stop_reward)

ray.shutdown()
stale[bot] commented 2 years ago

Hi, I'm a bot from the Ray team :)

To help human contributors to focus on more relevant issues, I will automatically add the stale label to issues that have had no activity for more than 4 months.

If there is no further activity in the 14 days, the issue will be closed!

You can always ask for help on our discussion forum or Ray's public slack channel.

stale[bot] commented 2 years ago

Hi again! The issue will be closed because there has been no more activity in the 14 days since the last message.

Please feel free to reopen or open a new issue if you'd still like it to be addressed.

Again, you can always ask for help on our discussion forum or Ray's public slack channel.

Thanks again for opening the issue!