limiting tensorflow memory failed in actor or function

What is your question?

limiting tensorflow memory failed in actor or function

Ray version and other system information (Python version, TensorFlow version, OS): ray:0.8.0 tensorflow:2.0.0-dev20191002 python:3.6 OS: Ubuntu 18.04.3 LTS I want to limit the GPU memory usage in tensorflow.

import ray
import numpy as np
from tensorflow.keras import layers
# import tensorflow as tf

def create_keras_model():
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices('GPU')
    gpu_id = 0
    tf.config.experimental.set_visible_devices(gpus[gpu_id], "GPU")
    tf.config.experimental.set_memory_growth(gpus[gpu_id], True)

    model = tf.keras.Sequential()
    # Adds a densely-connected layer with 64 units to the model:
    model.add(layers.Dense(64, activation="relu", input_shape=(32, )))
    # Add another:
    model.add(layers.Dense(64, activation="relu"))
    # Add a softmax layer with 10 output units:
    model.add(layers.Dense(10, activation="softmax"))

    model.compile(
        optimizer=tf.keras.optimizers.RMSprop(0.01),
        loss=tf.keras.losses.categorical_crossentropy,
        metrics=[tf.keras.metrics.categorical_accuracy])
    return model

ray.init()

def random_one_hot_labels(shape):
    n, n_class = shape
    classes = np.random.randint(0, n_class, n)
    labels = np.zeros((n, n_class))
    labels[np.arange(n), classes] = 1
    return labels

# Use GPU wth
# @ray.remote(num_gpus=1)
@ray.remote(num_gpus=0.5)
class Network(object):
    def __init__(self):
        self.model = create_keras_model()
        self.dataset = np.random.random((1000, 32))
        self.labels = random_one_hot_labels((1000, 10))

    def train(self):
        history = self.model.fit(self.dataset, self.labels, verbose=1)
        return history.history

    def get_weights(self):
        return self.model.get_weights()

    def set_weights(self, weights):
        # Note that for simplicity this does not handle the optimizer state.
        self.model.set_weights(weights)
    def evaluate(self):
        test_error,test_acc = self.model.evaluate(self.dataset,self.labels,verbose=0)
        return test_error,test_acc
NetworkActor = Network.remote()
result_object_id = NetworkActor.train.remote()
ray.get(result_object_id)
NetworkActor2 = Network.remote()
NetworkActor2.train.remote()
weights = ray.get(
    [NetworkActor.get_weights.remote(),
     NetworkActor2.get_weights.remote()])

averaged_weights = [(layer1 + layer2) / 2
                    for layer1, layer2 in zip(weights[0], weights[1])]

weight_id = ray.put(averaged_weights)
[
    actor.set_weights.remote(weight_id)
    for actor in [NetworkActor, NetworkActor2]
]

when I run the python script, i get

2019-12-30 02:45:49,367 INFO resource_spec.py:216 -- Starting Ray with 37.01 GiB memory available for workers and up to 18.51 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
Traceback (most recent call last):
  File "testgpu.py", line 60, in <module>
    ray.get(result_object_id)
  File "/usr/local/lib/python3.6/dist-packages/ray/worker.py", line 1457, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AttributeError): ray::Network.__init__() (pid=7743, ip=172.17.0.2)
  File "python/ray/_raylet.pyx", line 626, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 633, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 634, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 636, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 619, in ray._raylet.execute_task.function_executor
  File "testgpu.py", line 41, in __init__
    self.model = create_keras_model()
  File "testgpu.py", line 9, in create_keras_model
    gpus = tf.config.experimental.list_physical_devices('GPU')
AttributeError: module 'tensorflow' has no attribute 'config'

How can I limit the GPU memory usage in tensorflow so that I can run multi processes in a single GPU?

ray-project / ray

limiting tensorflow memory failed in actor or function #6633

What is your question?