Training for the parameters of a probabilistic model does not work when use .trainable_variables

import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions
tfb = tfp.bijectors

X = np.array([[ 1.0 , 2.0 , 5.0 ],
[ 1.0 , 1.0 , 3.0 ],
[ 1.0 , 4.0 , 2.0 ],
[ 1.0 , 5.0 , 2.0 ],
[ 1.0 , 3.0 , 1.0 ]])
y = np.array([ 1.0 , 0.0 , 1.0 , 1.0 , 0.0 ])
beta = tf.Variable(initial_value=np.random.randn(1, 3), trainable=True)
optimizer = tf.keras.optimizers.Adam(0.05)

print(beta.numpy())

@tf.function
def get_loss_and_grads(X_t, y_t):
  with tf.GradientTape() as tape:
    y_dist = tfd.Poisson(rate=tf.exp(tf.matmul(beta, tf.transpose(X_t))))
    loss = -tf.reduce_mean(y_dist.log_prob(y_t)))
#   grads = tape.gradient(loss, y_dist.trainable_variables)
  grads = tape.gradient(loss, [beta])
#   optimizer.apply_gradients(zip(grads, y_dist.trainable_variables))
  optimizer.apply_gradients(zip(grads, [beta]))
  return loss, grads

epochs = 5000
nll_loss = []
for _ in range(epochs):
  loss, grads = get_loss_and_grads(X, y)
  nll_loss.append(loss)

print(beta.numpy())

print(nll_loss[:10])

tensorflow / probability

Training for the parameters of a probabilistic model does not work when use .trainable_variables #1431