AttributeError: 'Tensor' object has no attribute 'log_prob'

nbro commented 4 years ago

The following code

import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd

def get_mnist_data(normalize=True):
    img_rows, img_cols = 28, 28
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    if tf.keras.backend.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    if normalize:
        x_train /= 255
        x_test /= 255

    return x_train, y_train, x_test, y_test, input_shape

def get_bayesian_cnn(input_shape, num_classes=10):
    model_input = tf.keras.layers.Input(shape=input_shape)

    # kernel_divergence_fn=None to solve a symbolic exception.
    x = tfp.layers.Convolution2DFlipout(6, kernel_size=(5, 5), padding="SAME", activation=tf.nn.relu,
                                        kernel_divergence_fn=None)(model_input)
    x = tf.keras.layers.Flatten()(x)
    x = tfp.layers.DenseFlipout(84, activation=tf.nn.relu)(x)
    x = tfp.layers.DenseFlipout(num_classes)(x)

    model_output = tfp.layers.DistributionLambda(lambda t: tfd.Categorical(logits=t, validate_args=True))(x)

    model = tf.keras.Model(model_input, model_output)

    return model

def neg_log_likelihood(y_true, y_pred):
    return -tf.reduce_mean(y_pred.log_prob(tf.cast(tf.argmax(y_true, axis=-1), tf.int32)))

def train():
    x_train, y_train, x_test, y_test, input_shape = get_mnist_data()

    model = get_bayesian_cnn(input_shape=input_shape)

    model.compile(optimizer=tf.keras.optimizers.Adam(), loss=neg_log_likelihood,
                  metrics=[neg_log_likelihood])

    model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1)

if __name__ == "__main__":
    train()

produces the error

AttributeError: 'Tensor' object has no attribute 'log_prob'

with TF 2.1 and TFP 0.9.

This error seems to be due to the fact that y_pred is a tensor when the loss is called, while it should be a distribution. Meanwhile, I found a question on Stack Overflow related to the third issue I mentioned above.

mcourteaux commented 4 years ago

I drilled down in the TensorFlow code. It's due to the automatic TensorFlow creating an automatic wrapper around your function. It casts and reshapes the model output (the distribution) to the type of the metric (which seems odd to me anyways). So, to prevent it, you should create your own wrapper, that doesn't perform this cast. The code that does this, is at: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/metrics.py#L583

So inspire yourself on that block of code to make your own Metric Wrapper. This should be a feature of TFP.

nbro commented 4 years ago

@mcourteaux Thank you for this info (I was already suspecting this, btw). Have a look at the duplicate issue https://github.com/tensorflow/tensorflow/issues/36181. You should also provide this info there. Feel free to provide a (temporary) concrete solution to this problem.

mcourteaux commented 4 years ago

For example, one can use this MeanMetricWrapper:

class MeanMetricWrapper(keras.metrics.Mean):
  def __init__(self, fn, name=None, dtype=None, **kwargs):
    super(MeanMetricWrapper, self).__init__(name=name, dtype=dtype)
    self._fn = fn
    self._fn_kwargs = kwargs
  def update_state(self, y_true, y_pred, sample_weight=None):
    matches = self._fn(y_true, y_pred, **self._fn_kwargs)
    return super(MeanMetricWrapper, self).update_state(
        matches, sample_weight=sample_weight)
  def get_config(self):
    config = {}
    for k, v in six.iteritems(self._fn_kwargs):
      config[k] = K.eval(v) if is_tensor_or_variable(v) else v
    base_config = super(MeanMetricWrapper, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

And when defining your metrics, use this one and pass your original lambda into the fn argument of the constructor.

nbro commented 4 years ago

@brianwa84, @jvdillon Can you please confirm (or not) that the solution provided @mcourteaux is the most appropriate workaround that currently exists?

I've trained a Bayesian neural network by early stopping it when the negative log-likelihood does not improve for several epochs. To do that, I pass my negative log-likelihood loss function (similar to the one defined above) to MeanMetricWrapper, i.e. MeanMetricWrapper(neg_log_likelihood), and then pass it to the metrics parameter of the compile method. Given that I am not very familiar with MeanMetricWrapper (and, in general, all metric classes and how they work or are supposed to be used) and given that training of my Bayesian model early stops after 5-6 epochs, I think I am doing something wrong, given that I wouldn't expect the Bayesian model to overfit so easily.

joaocaldeira commented 4 years ago

@nbro are you still having issues? This workaround does seem to be doing the job for me (in particular, it agrees with the loss function value up to some difference I can attribute to the kernel divergence).

I would still like to figure out how to extract the KL divergence separately, but that might be harder since it depends on the weight distributions.

nbro commented 4 years ago

@joaocaldeira Well, as I say in the comment above, I was trying to use the NLL computed with the workaround above to early stop my model, but the model early stops too quickly (although my datasets aren't relatively large, i.e. 8k training and 8k test instances), i.e. after 5-6 epochs, whereas the non-Bayesian model doesn't even early stop. I was actually expecting the Bayesian NN not to overfit so quickly, but Bayesian models can also overfit, but people usually say that they are more robust to overfitting, although I don't know precisely what they mean by that.

@joaocaldeira Would it be possible to see your source code? Have you also encountered an overfitting situation?

joaocaldeira commented 4 years ago

No obvious overfitting, or particularly quick stopping. I'm not particularly happy with the uncertainties I get from the model at the moment, but that's a completely separate issue. The relevant code snippet is

def mlp_flipout(hidden_dim=100, n_layers=3, n_inputs=13, dropout_rate=0, kernel='kl'):
    input_img = tfkl.Input(n_inputs)
    x = input_img
    if kernel == 'kl':
        kernel_fn = scaled_kl_fn
    elif kernel == 'mmd':
        kernel_fn = mmd_from_dists
    else:
        raise ValueError(f'Kernel {kernel} not defined!')

    for _ in range(n_layers):
        x = tfpl.DenseFlipout(hidden_dim, activation='relu', kernel_divergence_fn=kernel_fn)(x)
        if dropout_rate > 0:
            x = tfkl.Dropout(dropout_rate)(x)
    x = tfpl.DenseFlipout(2, kernel_divergence_fn=kernel_fn)(x)
    x = tfpl.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],
                                                     scale=1e-3 + tf.math.softplus(t[..., 1:])))(x)
    model = tfk.Model(input_img, x)

    model.compile(optimizer=tf.optimizers.Adam(learning_rate=1e-4), loss=negloglik,
                  metrics=['mse', MeanMetricWrapper(negloglik_met, name='nll')])

    return model

using the MeanMetricWrapper defined above.

nbro commented 4 years ago

@joaocaldeira I think I did exactly the same thing. What's the size of your dataset and what problem are you trying to solve? I just want to understand if this is related to my problem or not. I guess my dataset is too small or my model is too complex.

joaocaldeira commented 4 years ago

Large, ~90k, and a simple problem (really one which I could solve without a neural network), just wanted to test the uncertainties that come out of this. My network is fully-connected as above, if yours is convolutional, I guess that's a pretty big difference.

cadama commented 4 years ago

I tried the solution proposed by @mcourteaux but I stumbled in a different error. Here is my code

import six
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable
import tensorflow as tf
import tensorflow_probability as tfp

# https://github.com/tensorflow/probability/issues/742
class MeanMetricWrapper(tf.keras.metrics.Mean):
  def __init__(self, fn, name=None, dtype=None, **kwargs):
    super(MeanMetricWrapper, self).__init__(name=name, dtype=dtype)
    self._fn = fn
    self._fn_kwargs = kwargs
  def update_state(self, y_true, y_pred, sample_weight=None):
    matches = self._fn(y_true, y_pred, **self._fn_kwargs)
    return super(MeanMetricWrapper, self).update_state(
        matches, sample_weight=sample_weight)
  def get_config(self):
    config = {}
    for k, v in six.iteritems(self._fn_kwargs):
      config[k] = K.eval(v) if is_tensor_or_variable(v) else v
    base_config = super(MeanMetricWrapper, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

negloglik = lambda p_y, y: -p_y.log_prob(y)
negloglik_w = MeanMetricWrapper(negloglik)

class BetaBinomial(tf.keras.layers.Layer):
    def __init__(self):
        super(BetaBinomial, self).__init__()

    def build(self, input_shape):
        self.alpha = self.add_weight(shape=(), trainable=True, initializer=tf.keras.initializers.Ones())
        self.beta = self.add_weight(shape=(), trainable=True, initializer=tf.keras.initializers.Ones())
        self.posterion = tfp.layers.DistributionLambda(lambda clicks: tfp.distributions.DirichletMultinomial(
            tf.cast(clicks, tf.float32), [self.alpha, self.beta], validate_args=False, allow_nan_stats=True,
            name='DirichletMultinomial'))

    def call(self, input):
        return self.posterion(input)

clicks = tf.keras.layers.Input(name='clicks', shape=(), dtype=tf.int64)
posterior = BetaBinomial()(clicks)
m = tf.keras.Model(inputs=[clicks], outputs=posterior)
m.summary()
m.compile(loss=negloglik_w, optimizer='adam')

yet I get the error

WARNING:tensorflow:AutoGraph could not transform <bound method BetaBinomial.call of <__main__.BetaBinomial object at 0x83e6a8ed0>> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Unable to locate the source code of <bound method BetaBinomial.call of <__main__.BetaBinomial object at 0x83e6a8ed0>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
2020-04-16 12:23:51.199497: W tensorflow/python/util/util.cc:319] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
WARNING:tensorflow:
The following Variables were used a Lambda layer's call (distribution_lambda), but
are not present in its tracked objects:
  <tf.Variable 'beta_binomial/Variable:0' shape=() dtype=float32>
  <tf.Variable 'beta_binomial/Variable:0' shape=() dtype=float32>
It is possible that this is intended behavior, but it is more likely
an omission. This is a strong indication that this layer should be
formulated as a subclassed Layer rather than a Lambda layer.
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
clicks (InputLayer)          [(None,)]                 0         
_________________________________________________________________
beta_binomial (BetaBinomial) (None, 2)                 2         
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
Traceback (most recent call last):
  File "<input>", line 47, in <module>
  File "/Users/cdalmaso/opt/anaconda3/envs/tfp/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "/Users/cdalmaso/opt/anaconda3/envs/tfp/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 446, in compile
    self._compile_weights_loss_and_weighted_metrics()
  File "/Users/cdalmaso/opt/anaconda3/envs/tfp/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "/Users/cdalmaso/opt/anaconda3/envs/tfp/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1592, in _compile_weights_loss_and_weighted_metrics
    self.total_loss = self._prepare_total_loss(masks)
  File "/Users/cdalmaso/opt/anaconda3/envs/tfp/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1652, in _prepare_total_loss
    per_sample_losses = loss_fn.call(y_true, y_pred)
TypeError: call() takes 2 positional arguments but 3 were given

this is running on tensorflow==2.1.0 and tensorflow-probability==0.9.0 I am also confused: is the wrapper intended for the loss or for the metrics?
Thanks in advance

Strateus commented 4 years ago

@mcourteaux code does not work with tf2, since there is no .eval and is_tensor_or_variable functions, any other workaround? And when is this going to be fixed?

nbro commented 4 years ago

@Strateus I used the solution described in the comment https://github.com/tensorflow/probability/issues/742#issuecomment-580433644 and it worked for me with TF 2. You need to import that function from from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable and do tf.keras.backend.eval rather than just eval (or, equivalently, import eval from the Keras' backend). Here's the full solution.

import six
import tensorflow as tf

from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable

class MetricWrapper(tf.keras.metrics.Mean):
    def __init__(self, fn, name="my_metric", dtype=None, **kwargs):
        super(MetricWrapper, self).__init__(name=name, dtype=dtype)
        self._fn = fn
        self._fn_kwargs = kwargs

    def update_state(self, y_true, y_pred, sample_weight=None):
        matches = self._fn(y_true, y_pred, **self._fn_kwargs)
        return super(MetricWrapper, self).update_state(matches, sample_weight=sample_weight)

    def get_config(self):
        config = {}
        for k, v in six.iteritems(self._fn_kwargs):
            config[k] = tf.keras.backend.eval(v) if is_tensor_or_variable(v) else v
        base_config = super(MetricWrapper, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

Strateus commented 4 years ago

@nbro thanks, i used a different workaround: MultivariateNormalTriL instead of DenseFlipout.

Thank you for your hints, tried it too, but still fails:


    171         loss={'loss_1': negloglik, 'loss_2': MetricWrapper(negloglik, name='nll')},
--> 172         loss_weights={'loss_1': 1., 'loss_2': 1.}
    173     )

tensorflow_core\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

tensorflow_core\python\keras\engine\training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, target_tensors, distribute, **kwargs)
    444 
    445       # Creates the model loss and weighted metrics sub-graphs.
--> 446       self._compile_weights_loss_and_weighted_metrics()
    447 
    448       # Functions for train, test and predict will

tensorflow_core\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

tensorflow_core\python\keras\engine\training.py in _compile_weights_loss_and_weighted_metrics(self, sample_weights)
   1590       #                   loss_weight_2 * output_2_loss_fn(...) +
   1591       #                   layer losses.
-> 1592       self.total_loss = self._prepare_total_loss(masks)
   1593 
   1594   def _prepare_skip_target_masks(self):

tensorflow_core\python\keras\engine\training.py in _prepare_total_loss(self, masks)
   1650 
   1651           if hasattr(loss_fn, 'reduction'):
-> 1652             per_sample_losses = loss_fn.call(y_true, y_pred)
   1653             weighted_losses = losses_utils.compute_weighted_loss(
   1654                 per_sample_losses,

TypeError: call() takes 2 positional arguments but 3 were given

nbro commented 4 years ago

loss={'loss_1': negloglik, 'loss_2': MetricWrapper(negloglik, name='nll')}

To be honest, I didn't fully read your traceback, but this line seems to suggest that you're not using MetricWrapper for the first loss. Maybe do the following

loss={'loss_1': MetricWrapper(negloglik, name='nll1'), 'loss_2': MetricWrapper(negloglik, name='nll2')}

Strateus commented 4 years ago

Nope, my first loss does not need it, it works without this wrapper.

nbro commented 4 years ago

@Strateus But you're using negloglik in both cases (i.e. the same loss), i.e. you're passing negloglik to MetricWrapper and using negloglik directly.

Strateus commented 4 years ago

it is the same function, used for 2 different outputs, yes. And it works fine with MultivariateNormalTril, but does not work with DenseFlipout.

nbro commented 4 years ago

@Strateus But I am suggesting that you also use MetricWrapper for loss_1 too, to avoid the error you describe above, otherwise, why would you need MetricWrapper in the first place if you can use negloglik directly?

Strateus commented 4 years ago

i cannot use negloglik directly with DenseFlipout, because it does not have log_prob somehow. So i either need to replace DenseFlipout with MultivariateNormalTril, or use this wrapper (as i thought, but it does not work)

nbro commented 4 years ago

@Strateus My question is: why don't you use MetricWrapper for loss_1 too? That's what I've not yet understood.

Strateus commented 4 years ago

why would i need to do this, if it works ok directly? Occam's razor, i don't need a wrapper if i don't need it.

nbro commented 4 years ago

@Strateus That's why I asked another question above: why do you use it for loss_2 if it works directly then? I think there's a big misunderstanding here.

Strateus commented 4 years ago

It works directly only with loss_1, because it is a "MultivariateNormalTril" layer. It does not work directly with loss_2, which is a "DenseFlipout layer".

Strateus commented 4 years ago

to rephrase: DenseFlipout has some bug, which was not yet fixed, that is why i am here asking this question. If there would not be a bug in TFP, i would not need to do this wrapper stuff, which is obviously a workaround.

nbro commented 4 years ago

@Strateus Ha, so you have like a fork of layers in your model. But have you tried to wrap both losses with MetricWrapper and use in both cases dense layers?

The bug is not in the dense layer, I think. The problem is that Keras/TensorFlow was not programmed to deal with models that return a distribution.

ShikhaSingh10 commented 4 years ago

@nbro @Strateus I used the same MetricWrapper that u shared for tf==2.0 and tfp==0.9.0. The model architecture is:

   prior = tfd.Independent(tfd.Normal(loc=tf.zeros(time_steps_output, dtype=tf.float32), scale=1.0), reinterpreted_batch_ndims=1)

    model = keras.Sequential()
    model.add(keras.layers.Input(shape=X_shape[-2:]))
    model.add(keras.layers.GRU(256, activation="relu", return_sequences=True))
    model.add(keras.layers.GRU(128, activation="relu", return_sequences=True))
    model.add(keras.layers.GRU(128, activation="relu", return_sequences=True))
    model.add(keras.layers.GRU(64, activation="relu", recurrent_dropout=0.4))
    model.add(keras.layers.Dense(16, activation="relu"))
    model.add(keras.layers.Dense(tfp.layers.MultivariateNormalTriL.params_size(time_steps_output),
                                 activation=None, name="distribution_weights"))
    model.add(tfp.layers.MultivariateNormalTriL(time_steps_output,
                                                activity_regularizer=tfp.layers.KLDivergenceRegularizer
                                                (prior, weight=1 / n_batches), name="output"))

This is my compile and neg_log_likelihood():

def neg_log_likelihood(y_true, y_pred): return -tf.reduce_mean(y_pred.log_prob(tf.cast(tf.argmax(y_true, axis=-1), tf.int32)))

def fit_model(model, data_train, data_val, n_epochs):

parallel_model = multi_gpu_model(model, gpus=2)

# Define Early stopper
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)

parallel_model.compile(
    loss=MetricWrapper(neg_log_likelihood),
    optimizer=keras.optimizers.Adam(0.000001)
    # metrics=[MetricWrapper(neg_log_likelihood)]
)

history = parallel_model.fit(
    data_train,
    epochs=n_epochs,
    validation_data=data_val,
    verbose=0,
    shuffle=False,
    callbacks=[es]
)

Then also I got this error:

Traceback (most recent call last): File "event_mapping_model_training.py", line 318, in model_trend_1, history_1 = fit_model(model_trend_1, data_train, data_val, n_epochs) File "event_mapping_model_training.py", line 210, in fit_model optimizer=keras.optimizers.Adam(0.000001) File "/home/pvsignal/anaconda3/envs/trend_env2/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper result = method(self, *args, *kwargs) File "/home/pvsignal/anaconda3/envs/trend_env2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 446, in compile self._compile_weights_loss_and_weighted_metrics() File "/home/pvsignal/anaconda3/envs/trend_env2/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper result = method(self, args, **kwargs) File "/home/pvsignal/anaconda3/envs/trend_env2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1592, in _compile_weights_loss_and_weighted_metrics self.total_loss = self._prepare_total_loss(masks) File "/home/pvsignal/anaconda3/envs/trend_env2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1652, in _prepare_total_loss per_sample_losses = loss_fn.call(y_true, y_pred) TypeError: call() takes 2 positional arguments but 3 were given

nbro commented 4 years ago

@ShikhaSingh10 I don't know if this is the problem, but I am using the functional API (rather than the sequential one). I am also using DistributionLambda, to which I pass the output distribution.

ShikhaSingh10 commented 4 years ago

@nbro This is the enviornment details:

tensorboard 2.2.2 pypi_0 pypi tensorboard-plugin-wit 1.7.0 pypi_0 pypi tensorflow 2.2.0 pypi_0 pypi tensorflow-estimator 2.2.0 pypi_0 pypi tensorflow-probability 0.10.0 pypi_0 pypi

In the same design I am facing this issue now:

AttributeError Traceback (most recent call last)

in 25 batch_size=batch_size, 26 epochs=epochs, ---> 27 validation_split=0.2, callbacks=[es]) ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs) 106 def _method_wrapper(self, *args, **kwargs): 107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access --> 108 return method(self, *args, **kwargs) 109 110 # Running inside `run_distribute_coordinator` already. ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1100 _r=1): 1101 callbacks.on_train_batch_begin(step) -> 1102 tmp_logs = self.train_function(iterator) 1103 if data_handler.should_sync: 1104 context.async_wait() ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds) 794 else: 795 compiler = "nonXla" --> 796 result = self._call(*args, **kwds) 797 798 new_tracing_count = self._get_tracing_count() ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds) 837 # This is the first call of __call__, so we have to initialize. 838 initializers = [] --> 839 self._initialize(args, kwds, add_initializers_to=initializers) 840 finally: 841 # At this point we know that the initialization is complete (or less ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to) 710 self._concrete_stateful_fn = ( 711 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access --> 712 *args, **kwds)) 713 714 def invalid_creator_scope(*unused_args, **unused_kwds): ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 2946 args, kwargs = None, None 2947 with self._lock: -> 2948 graph_function, _, _ = self._maybe_define_function(args, kwargs) 2949 return graph_function 2950 ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3317 3318 self._function_cache.missed.add(call_context_key) -> 3319 graph_function = self._create_graph_function(args, kwargs) 3320 self._function_cache.primary[cache_key] = graph_function 3321 ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3179 arg_names=arg_names, 3180 override_flat_arg_shapes=override_flat_arg_shapes, -> 3181 capture_by_value=self._capture_by_value), 3182 self._function_attributes, 3183 function_spec=self.function_spec, ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 984 _, original_func = tf_decorator.unwrap(python_func) 985 --> 986 func_outputs = python_func(*func_args, **func_kwargs) 987 988 # invariant: `func_outputs` contains only Tensors, CompositeTensors, ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds) 612 # __wrapped__ allows AutoGraph to swap in a converted function. We give 613 # the function a weak reference to itself to avoid a reference cycle. --> 614 return weak_wrapped_fn().__wrapped__(*args, **kwds) 615 weak_wrapped_fn = weakref.ref(wrapped_fn) 616 ~/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs) 971 except Exception as e: # pylint:disable=broad-except 972 if hasattr(e, "ag_error_metadata"): --> 973 raise e.ag_error_metadata.to_exception(e) 974 else: 975 raise AttributeError: in user code: /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:809 train_function * return step_function(self, iterator) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:799 step_function ** outputs = model.distribute_strategy.run(run_step, args=(data,)) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1231 run return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2764 call_for_each_replica return self._call_for_each_replica(fn, args, kwargs) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3161 _call_for_each_replica return fn(*args, **kwargs) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:792 run_step ** outputs = model.train_step(data) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:752 train_step y, y_pred, sample_weight, regularization_losses=self.losses) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/compile_utils.py:186 __call__ self.build(y_pred) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/compile_utils.py:139 build self._losses = nest.map_structure(self._get_loss_object, self._losses) /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/util/nest.py:637 map_structure structure[0], [func(*x) for x in entries], /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/util/nest.py:637 structure[0], [func(*x) for x in entries], /home/shikha/anaconda3/envs/trend_env/lib/python3.7/site-packages/tensorflow/python/keras/engine/compile_utils.py:264 _get_loss_object loss_name = loss.__name__ AttributeError: 'MetricWrapper' object has no attribute '__name__'

ShikhaSingh10 commented 4 years ago

I think i found the solution without using MetricWrapper. I converted the tensor into distribution before calculating neg_log_likelihood.

In the following code, I converted y_pred tensor into MultivariateNormalTril distribution which is the expected output distribution

def neg_log_likelihood(y_true, y_pred):
    y_pred = tfp.distributions.MultivariateNormalTriL(y_pred)
    return -tf.reduce_mean(y_pred.log_prob(y_true))

tomasruizt commented 3 years ago

I'm experiencing this problem actually after training, at model.save() time, with tf 2.3 and tfp 0.11

tensorboard==2.3.0
tensorboard-plugin-wit==1.7.0
tensorflow==2.3.1
tensorflow-estimator==2.3.0
tensorflow-probability==0.11.1

Minimal example:

import tensorflow.keras as tfk
import tensorflow_probability as tfp
import tensorflow as tf

input_dim = 2
latent_dim = 1

prior = tfp.distributions.MultivariateNormalDiag(loc=tf.zeros(latent_dim))

encoder = tfk.Sequential([
    tfk.layers.InputLayer(input_shape=[input_dim]),
    tfk.layers.Dense(units=tfp.layers.MultivariateNormalTriL.params_size(latent_dim)),
    tfp.layers.MultivariateNormalTriL(
        event_size=latent_dim,
        activity_regularizer=tfp.layers.KLDivergenceRegularizer(prior),
    )
])

decoder = tfk.Sequential([
    tfk.layers.InputLayer(input_shape=[latent_dim]),
    tfk.layers.Dense(units=tfp.layers.MultivariateNormalTriL.params_size(input_dim)),
    tfp.layers.MultivariateNormalTriL(event_size=input_dim)
])

VAE = tfk.Model(inputs=encoder.inputs, outputs=decoder(encoder.outputs[0]))

def loss(x, x_rv):
    return -tf.reduce_sum(x_rv.log_prob(x))

VAE.compile(loss=loss)

data = tf.random.uniform(shape=(500, input_dim))
VAE.fit(x=data, y=data, epochs=2)
print("training done")

VAE.save(filepath="mymodel")  # Failure

Specifically, it seems the KLDivergenceRegularizer is being passed a Tensor instead of a distribution and that is triggering the error. Even after wrapping my loss function with MeanMetricWrapper I get the same AttributeError :\

Am I missing some way to make the workaround work? Thanks in advance!

cserpell commented 3 years ago

I managed to get save to work, making the following modifications to the file tensorflow_probability/python/layers/distribution_layer.py. I would really appreciate if this bug is fixed, as storing and loading modules is essential for me.

Add a check in _make_kl_divergence_fn function, in order to avoid computing the divergence when the input is a Tensor object, like this:

    with tf.name_scope('kldivergence_loss'):
      if isinstance(distribution_a, tf.Tensor):
          return 0.0
      ...

Either change KLDivergenceRegularizer for a KLDivergenceAddLoss layer, or add a get_config method to the KLDivergenceRegularizer object:

  def get_config(self):
    config = {'use_exact_kl': self._use_exact_kl,
              'test_points_reduce_axis': self._test_points_reduce_axis,
              'weight': self._weight}
    return dict(list(config.items()))

cserpell commented 3 years ago

Sadly, I found another bug when loading the model, even if I can save it. I was going to prepare a colab for that, but I don't know how to easily change already installed tensorflow probability code in the cloud machine. In the meantime, here it is a short example, where I was trying different commented lines to fix the problem, but it always fails.

cserpell commented 3 years ago

This issue continues in tensorflow probability version 0.12.1, I have to update the code as mentioned here to make it work.

a-berg commented 2 years ago

This bug has to be fixed. Not being able to save TF probability models is a major issue I think! And the fix seems easy enough, as per @cserpell snippet.

glm-barcelin commented 2 years ago

Still an issue today using tfp 0.15.0 and TF 2.7.0

klanderson commented 2 years ago

Same problem using tfp 0.16.0 and tf 2.9.1

newtonh20 commented 2 years ago

Still an issue today using tfp 0.17.0 and tf 2.9.2

viniciusdsmello commented 2 years ago

Still an issue using tfp 0.18.0

yitelee commented 1 year ago

Change the model output - add the distribution params alongside the original predictions. Then in the custom loss fn, duplicate exactly the same tfd distribution, supply it with the dist. params and restore the log_prob part. Finally you should trim off the unneeded params from model.predict's output to get usual predictions.

class TPNormal_layer(tf.keras.layers.Layer):
    def __init__(self, n_outputs):
        super(TPNormal_layer, self).__init__()

        kernel_divergence_fn = lambda q, p, _: tfp.distributions.kl_divergence(q, p)
        bias_divergence_fn = kernel_divergence_fn

        self.parameters = DenseFlipout(n_outputs*3, kernel_divergence_fn=kernel_divergence_fn, 
            bias_divergence_fn=bias_divergence_fn)

        make_distribution_fn = lambda t: tfd.TwoPieceNormal(loc=t[..., :n_outputs],
            scale=1e-3 + tf.math.softplus(0.05 * t[...,n_outputs:n_outputs*2]),
            skewness= tf.math.softplus(0.05 * t[...,n_outputs*2:n_outputs*3]))
        convert_to_tensor_fn = make_distribution_fn

        self.samples = DistributionLambda(make_distribution_fn)

    def call(self, inputs):
        x = self.parameters(inputs)
        output = self.samples(x)
        return concatenate([output, x])

outputs = TPNormal_layer(n_outputs=3)(all_inputs)
model = tf.keras.Model(inputs=all_inputs, outputs=outputs)

def MyLoss():
  def loss(y_true, y_pred):
    n_outputs = 3
    yparams = y_pred[..., n_outputs:]
    distfn = lambda t: tfd.TwoPieceNormal(loc=t[..., :n_outputs],
      scale=1e-3 + tf.math.softplus(0.05 * t[...,n_outputs:n_outputs*2]),
      skewness= tf.math.softplus(0.05 * t[...,n_outputs*2:n_outputs*3]))
    ypdist = distfn(yparams)
    loss_new = -tf.reduce_mean(ypdist.log_prob(y_true))
    return loss_new
  return loss

model.compile(loss=MyLoss(),  optimizer=adam)

AndiBerber commented 7 months ago

The problem still occurs with tensorflow=2.15 and tensorflow-probability=0.23. None of the approaches above works for me. Exporting for inference works, but the training signature throws with AttributeError: 'SymbolicTensor' object has no attribute 'log_prob'

tensorflow / probability

AttributeError: 'Tensor' object has no attribute 'log_prob' #742