Error passing total_size in pm.gp.GP

junpenglao commented 7 years ago

# data setup
n = 100
X = np.sort(3*np.random.rand(n))[:,None]

with pm.Model() as model:
    # f(x)
    l_true = 0.3
    s2_f_true = 1.0
    cov = s2_f_true * pm.gp.cov.ExpQuad(1, l_true)

    # noise, epsilon
    s2_n_true = 0.1
    K_noise = s2_n_true**2 * tt.eye(n)
    K = cov(X) + K_noise

# evaluate the covariance with the given hyperparameters
K = theano.function([], cov(X) + K_noise)()

# generate fake data from GP with white noise (with variance sigma2)
ymu = np.random.multivariate_normal(np.zeros(n), K)

with pm.Model() as model:
    # priors on the covariance function hyperparameters
    #l = pm.Uniform('l', 0, 10)
    l = pm.Gamma('l',1.,1.,testval=l_true)

    # uninformative prior on the function variance
    #log_s2_f = pm.Uniform('log_s2_f', lower=-10, upper=5)
    #s2_f = pm.Deterministic('s2_f', tt.exp(log_s2_f))
    s2_f = pm.Gamma('s2_f', 1.,1.,testval=s2_f_true)

    # uninformative prior on the noise variance
    #log_s2_n = pm.Uniform('log_s2_n', lower=-10, upper=5)
    #s2_n = pm.Deterministic('s2_n', tt.exp(log_s2_n))
    s2_n = pm.Gamma('s2_n', 1.,1.,testval=s2_n_true)

    # covariance functions for the function f and the noise
    f_cov = s2_f * pm.gp.cov.ExpQuad(1, l)

    intercept = pm.Cauchy('b',alpha=0,beta=10)
    meanfuc = pm.gp.mean.Constant(intercept)

    y_obs = pm.gp.GP('y_obs', 
                     mean_func=meanfuc, 
                     cov_func=f_cov, 
                     sigma=s2_n,
                     observed={'X':X, 'Y':ymu},
                     total_size=n)

returns the following error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-20-be2f47f890d0> in <module>()
     27                      sigma=s2_n,
     28                      observed={'X':minibatch_x, 'Y':minibatch_x},
---> 29                      total_size=n)

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
     35             total_size = kwargs.pop('total_size', None)
     36             dist = cls.dist(*args, **kwargs)
---> 37             return model.Var(name, dist, data, total_size)
     38         else:
     39             raise TypeError("Name needs to be a string but got: {}".format(name))

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size)
    522         elif isinstance(data, dict):
    523             var = MultiObservedRV(name=name, data=data, distribution=dist,
--> 524                                   total_size=total_size, model=self)
    525             self.observed_RVs.append(var)
    526             if var.missing_values:

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/model.py in __init__(self, name, data, distribution, total_size, model)
    927         self.missing_values = [datum.missing_values for datum in self.data.values()
    928                                if datum.missing_values is not None]
--> 929         self.logp_elemwiset = distribution.logp(**self.data)
    930         self.total_size = total_size
    931         self.model = model

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/gp/gp.py in logp(self, Y, X)
     74         Sigma = self.K(X) + tt.eye(X.shape[0])*self.sigma**2
     75 
---> 76         return MvNormal.dist(mu, Sigma).logp(Y)
     77 
     78 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/distributions/multivariate.py in logp(self, value)
    130         else:
    131             result -= logdet(tau)
--> 132         result += (tt.dot(delta, tau) * delta).sum(axis=delta.ndim - 1)
    133         return -1 / 2. * result
    134 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/tensor/basic.py in dot(a, b)
   5616         return tensordot(a, b, [[a.ndim - 1], [numpy.maximum(0, b.ndim - 2)]])
   5617     else:
-> 5618         return _dot(a, b)
   5619 
   5620 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    666                 thunk.outputs = [storage_map[v] for v in node.outputs]
    667 
--> 668                 required = thunk()
    669                 assert not required  # We provided all inputs
    670 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/gof/op.py in rval(p, i, o, n)
    910             # default arguments are stored in the closure of `rval`
    911             def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 912                 r = p(n, [x[0] for x in i], o)
    913                 for o in node.outputs:
    914                     compute_map[o][0] = True

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/tensor/basic.py in perform(self, node, inp, out)
   5434         # gives a numpy float object but we need to return a 0d
   5435         # ndarray
-> 5436         z[0] = numpy.asarray(numpy.dot(x, y))
   5437 
   5438     def grad(self, inp, grads):

ValueError: shapes (25,1) and (25,25) not aligned: 1 (dim 1) != 25 (dim 0)

junpenglao commented 7 years ago

@ferrine following our discussion on Gitter

ferrine commented 7 years ago

Thanks, I'll look at it closer when I close my deadlines

bwengals commented 7 years ago

if I copy paste I don't get the error, is that the right example?

twiecki commented 7 years ago

@junpenglao Are you up-to-date on theano?

junpenglao commented 7 years ago

@twiecki Maybe not - i am on '0.8.2'. Upgrading now...

junpenglao commented 7 years ago

The error seems to appear when passing minibatch input now, I am using the following code (returns a different error):

# data setup
n = 100
X = np.sort(3*np.random.rand(n))[:,None]

with pm.Model() as model:
    # f(x)
    l_true = 0.3
    s2_f_true = 1.0
    cov = s2_f_true * pm.gp.cov.ExpQuad(1, l_true)

    # noise, epsilon
    s2_n_true = 0.1
    K_noise = s2_n_true**2 * tt.eye(n)
    K = cov(X) + K_noise

# evaluate the covariance with the given hyperparameters
K = theano.function([], cov(X) + K_noise)()

# generate fake data from GP with white noise (with variance sigma2)
ymu = np.random.multivariate_normal(np.zeros(n), K)

# Generator that returns mini-batches in each iteration
def create_minibatch(data, batchsize = 20):
    rng = np.random.RandomState(0)

    while True:
        # Return random data samples of set size 100 each iteration
        ixs = rng.randint(len(data), size=batchsize)
        yield data[ixs]

batchsize = 20
minibatch_x = pm.generator(create_minibatch(X, batchsize = batchsize))
minibatch_y = pm.generator(create_minibatch(ymu, batchsize = batchsize))

with pm.Model() as model:
    # priors on the covariance function hyperparameters
    #l = pm.Uniform('l', 0, 10)
    l = pm.Gamma('l',1.,1.,testval=l_true)

    # uninformative prior on the function variance
    #log_s2_f = pm.Uniform('log_s2_f', lower=-10, upper=5)
    #s2_f = pm.Deterministic('s2_f', tt.exp(log_s2_f))
    s2_f = pm.Gamma('s2_f', 1.,1.,testval=s2_f_true)

    # uninformative prior on the noise variance
    #log_s2_n = pm.Uniform('log_s2_n', lower=-10, upper=5)
    #s2_n = pm.Deterministic('s2_n', tt.exp(log_s2_n))
    s2_n = pm.Gamma('s2_n', 1.,1.,testval=s2_n_true)

    # covariance functions for the function f and the noise
    f_cov = s2_f * pm.gp.cov.ExpQuad(1, l)

    intercept = pm.Cauchy('b',alpha=0,beta=10)
    meanfuc = pm.gp.mean.Constant(intercept)

    y_obs = pm.gp.GP('y_obs', 
                     mean_func=meanfuc, 
                     cov_func=f_cov, 
                     sigma=s2_n,
                     observed={'X':minibatch_x, 'Y':minibatch_y},
                     total_size=n)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-9-cdeb6a3454b7> in <module>()
     59                      sigma=s2_n,
     60                      observed={'X':minibatch_x, 'Y':minibatch_y},
---> 61                      total_size=n)

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
     35             total_size = kwargs.pop('total_size', None)
     36             dist = cls.dist(*args, **kwargs)
---> 37             return model.Var(name, dist, data, total_size)
     38         else:
     39             raise TypeError("Name needs to be a string but got: {}".format(name))

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size)
    522         elif isinstance(data, dict):
    523             var = MultiObservedRV(name=name, data=data, distribution=dist,
--> 524                                   total_size=total_size, model=self)
    525             self.observed_RVs.append(var)
    526             if var.missing_values:

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/model.py in __init__(self, name, data, distribution, total_size, model)
    927         self.missing_values = [datum.missing_values for datum in self.data.values()
    928                                if datum.missing_values is not None]
--> 929         self.logp_elemwiset = distribution.logp(**self.data)
    930         self.total_size = total_size
    931         self.model = model

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/gp/gp.py in logp(self, Y, X)
     74         Sigma = self.K(X) + tt.eye(X.shape[0])*self.sigma**2
     75 
---> 76         return MvNormal.dist(mu, Sigma).logp(Y)
     77 
     78 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pymc3/distributions/multivariate.py in logp(self, value)
    122         tau = self.tau
    123 
--> 124         delta = value - mu
    125         k = tau.shape[0]
    126 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/tensor/var.py in __sub__(self, other)
    145         # and the return value in that case
    146         try:
--> 147             return theano.tensor.basic.sub(self, other)
    148         except (NotImplementedError, AsTensorError):
    149             return NotImplemented

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    672                 thunk.outputs = [storage_map[v] for v in node.outputs]
    673 
--> 674                 required = thunk()
    675                 assert not required  # We provided all inputs
    676 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/gof/op.py in rval()
    841 
    842         def rval():
--> 843             fill_storage()
    844             for o in node.outputs:
    845                 compute_map[o][0] = True

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/theano/gof/cc.py in __call__(self)
   1696                 print(self.error_storage, file=sys.stderr)
   1697                 raise
-> 1698             reraise(exc_type, exc_value, exc_trace)
   1699 
   1700 

/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/six-1.10.0-py3.5.egg/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
    685             raise value.with_traceback(tb)
--> 686         raise value
    687 
    688 else:

ValueError: Input dimension mis-match. (input[0].shape[1] = 20, input[1].shape[1] = 1)

junpenglao commented 7 years ago

I think it is because of the setup in pm.gp.GP

twiecki commented 7 years ago

Oh, you're passing in a generator to GP. Not sure that's tested or even expected to work. You could only use mini-batch ADVI to begin with. Not sure how well that works with GPs.

twiecki commented 7 years ago

CC @ferrine.

junpenglao commented 7 years ago

Oh, you're passing in a generator to GP. Not sure that's tested or even expected to work. You could only use mini-batch ADVI to begin with. Not sure how well that works with GPs.

Yeah I am currently casting the variable directly into a MultiNormal instead - seems to work fine in Gaussian case but still returning bias estimation in eg Poisson likelihood. I am still trying to figure it out.

ferrine commented 7 years ago

Tests on my macbook pass. Minibatch ADVI is OK for GPs according to gitter discussion. Generators are the same tensors and behave like tensors, only theoretical constraints matter.

ferrine commented 7 years ago

BTW what is your theano version? I use 0 9.0rc4

junpenglao commented 7 years ago

My theano version is '0.9.0.dev-d704a600dfb029eed39957730a42f50262df004f'. And I got the same error both on my macbook and linux.

ferrine commented 7 years ago

That is really strange.

junpenglao commented 7 years ago

my mac is on pymc3-3.1rc2 theano-0.9.0rc4

fonnesbeck commented 7 years ago

Is the usage of total_size anywhere in the docs (or even a docstring)? I don't see it anywhere.

twiecki commented 7 years ago

@ferrine ^ Should definitely add that.

pymc-devs / pymc

Error passing total_size in pm.gp.GP #1917