PGM-Lab / InferPy

InferPy: Deep Probabilistic Modeling with Tensorflow Made Easy
https://inferpy-docs.readthedocs.io/en/stable/index.html
Apache License 2.0
146 stars 14 forks source link

broadcasting error at posterior of VAE MNSIT #178

Closed rcabanasdepaz closed 5 years ago

rcabanasdepaz commented 5 years ago

When running the following code

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import inferpy as inf

# number of components
k = 2
# size of the hidden layer in the NN
d0 = 100
# dimensionality of the data
dx = 28 * 28
# number of observations (dataset size)
N = 1000
# batch size
M = 100
# digits considered
DIG = [0, 1, 2]
# minimum scale
scale_epsilon = 0.01
# inference parameters
num_epochs = 1000
learning_rate = 0.01

# reset tensorflow
tf.reset_default_graph()
tf.set_random_seed(1234)

from inferpy.data import mnist

# load the data
(x_train, y_train), _ = mnist.load_data(num_instances=N, digits=DIG)

mnist.plot_digits(x_train, grid=[5,5])

############## Inferpy ##############

@inf.probmodel
def vae(k, d0, dx, decoder):
    with inf.datamodel():
        z = inf.Normal(tf.ones(k) * 0.5, 1, name="z")  # shape = [N,k]
        output = decoder(z, d0, dx)
        x_loc = output[:, :dx]
        x_scale = tf.nn.softmax(output[:, dx:]) + scale_epsilon
        x = inf.Normal(x_loc, x_scale, name="x")  # shape = [N,d]

# Neural networks for decoding and encoding

def decoder(z, d0, dx):  # k -> d0 -> 2*dx
    h0 = tf.layers.dense(z, d0, tf.nn.relu)
    return tf.layers.dense(h0, 2 * dx)

def encoder(x, d0, k):  # dx -> d0 -> 2*k
    h0 = tf.layers.dense(x, d0, tf.nn.relu)
    return tf.layers.dense(h0, 2 * k)

# Q model for making inference

@inf.probmodel
def qmodel(k, d0, dx, encoder):
    with inf.datamodel():
        x = inf.Normal(tf.ones(dx) * 0.5, 1, name="x")
        output = encoder(x, d0, k)
        qz_loc = output[:, :k]
        qz_scale = tf.nn.softmax(output[:, k:]) + scale_epsilon
        qz = inf.Normal(qz_loc, qz_scale, name="z")

# Inference
############################

m = vae(k, d0, dx, decoder)
q = qmodel(k, d0, dx, encoder)

# set the inference algorithm
SVI = inf.inference.SVI(q, epochs=100, batch_size=M)

############################
# learn the parameters
m.fit({"x": x_train}, SVI)

# Usage of the model with the inferred parameters
####################################################

sess = inf.get_session()
postz = m.posterior("z", data={"x": x_train}).sample()

I get the following broadcasting error:

Traceback (most recent call last):
  File "[...]/venv/InferPy/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-6430e7f5d37c>", line 96, in <module>
    postz = m.posterior("z", data={"x": x_train}).sample()
  File "[...]/InferPy/inferpy/queries/query.py", line 12, in wrapper
    result = f(*args, **kwargs)
  File "[...]/InferPy/inferpy/util/runtime.py", line 86, in wrapper
    return f(*args, **kwargs)
  File "[...]/InferPy/inferpy/queries/query.py", line 54, in sample
    with contextmanager.observe(self.observed_variables, self.data):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 81, in __enter__
    return next(self.gen)
  File "[...]/InferPy/inferpy/contextmanager/evidence.py", line 30, in observe
    np.broadcast_to(v, variables[k].observed_value.shape.as_list()), session=sess)
  File "[...]/venv/InferPy/lib/python3.6/site-packages/numpy/lib/stride_tricks.py", line 182, in broadcast_to
    return _broadcast_to(array, shape, subok=subok, readonly=True)
  File "[...]/venv/InferPy/lib/python3.6/site-packages/numpy/lib/stride_tricks.py", line 129, in _broadcast_to
    op_flags=[op_flag], itershape=shape, order='C')
ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (1000,784) and requested shape (100,784)
rcabanasdepaz commented 5 years ago

The data size at posterior query should be of the same size of the batch:

postz = np.concatenate([
    m.posterior("z", data={"x": x_train[i:i+M,:]}).sample()
    for i in range(0,N,M)])