pymc-devs / pymc

Bayesian Modeling and Probabilistic Programming in Python
https://docs.pymc.io/
Other
8.72k stars 2.01k forks source link

Use of pm.Data affects shape interpretation #4765

Open rpgoldman opened 3 years ago

rpgoldman commented 3 years ago

Description of your problem

I have a model that compiles correctly when the priors are represented as numpy arrays, but that gets a shape error when those numpy arrays are wrapped in pm.Data, which seems wrong (isn't it?).

Example code

num_words = df.shape[0]

with pm.Model() as model:
    #alpha = pm.Data("α", np.ones((1, K)))
    alpha = np.ones((1, K))
    # beta_prior = pm.Data("β", np.ones((1, V)))
    beta_prior = np.ones((1, V))
    doc_num = pm.Data('i', df['Document'])
    theta = pm.Dirichlet("θ", a=alpha, shape=(D, K)) # topic probabilities

    # z = pm.Categorical("z_i", p=theta[doc_num], shape=df.shape[0]) # topic for word
    beta = pm.Dirichlet("beta", a=beta_prior, shape=(K, V)) # word probabilities
    #ws = pm.Deterministic("z * w", t.dot(theta[doc_num], beta))
    w = pm.Categorical("w", 
                       p=t.dot(theta[doc_num], beta),
                       # p=ws,
                       shape=df.shape[0],
                       observed=df['Word']
                      )

(full code will be supplied as attachment -- but this may have an obvious answer.)

When the above piece of code is executed as written, it completes successfully.

However, if I replace the definitions of alpha and beta_prior with the commented out lines using pm.Data, I get shape errors. Traceback follows:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-5542319f899e> in <module>
      7     #beta_prior = np.ones((1, V))
      8     doc_num = pm.Data('i', df['Document'])
----> 9     theta = pm.Dirichlet("θ", a=alpha, shape=(D, K)) # topic probabilities
     10 
     11     # z = pm.Categorical("z_i", p=theta[doc_num], shape=df.shape[0]) # topic for word

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
    120         else:
    121             dist = cls.dist(*args, **kwargs)
--> 122         return model.Var(name, dist, data, total_size, dims=dims)
    123 
    124     def __getnewargs__(self):

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size, dims)
   1140             else:
   1141                 with self:
-> 1142                     var = TransformedRV(
   1143                         name=name,
   1144                         distribution=dist,

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, model, transform, total_size)
   2011 
   2012             self.transformed = model.Var(
-> 2013                 transformed_name, transform.apply(distribution), total_size=total_size
   2014             )
   2015 

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/transforms.py in apply(self, dist)
    124     def apply(self, dist):
    125         # avoid circular import
--> 126         return TransformedDistribution.dist(dist, self)
    127 
    128     def __str__(self):

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
    128     def dist(cls, *args, **kwargs):
    129         dist = object.__new__(cls)
--> 130         dist.__init__(*args, **kwargs)
    131         return dist
    132 

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/transforms.py in __init__(self, dist, transform, *args, **kwargs)
    152         self.dist = dist
    153         self.transform_used = transform
--> 154         v = forward(FreeRV(name="v", distribution=dist))
    155         self.type = v.type
    156 

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, total_size, model)
   1669                 np.ones(distribution.shape, distribution.dtype) * distribution.default()
   1670             )
-> 1671             self.logp_elemwiset = distribution.logp(self)
   1672             # The logp might need scaling in minibatches.
   1673             # This is done in `Factor`.

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/multivariate.py in logp(self, value)
    520         # only defined for sum(value) == 1
    521         return bound(
--> 522             tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)),
    523             tt.all(value >= 0),
    524             tt.all(value <= 1),

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/pymc3/distributions/dist_math.py in logpow(x, m)
    106     """
    107     # return m * log(x)
--> 108     return tt.switch(tt.eq(x, 0), tt.switch(tt.eq(m, 0), 0.0, -np.inf), m * tt.log(x))
    109 
    110 

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/tensor/var.py in __mul__(self, other)
    126         # and the return value in that case
    127         try:
--> 128             return theano.tensor.mul(self, other)
    129         except (NotImplementedError, TypeError):
    130             return NotImplemented

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/graph/op.py in __call__(self, *inputs, **kwargs)
    251 
    252         if config.compute_test_value != "off":
--> 253             compute_test_value(node)
    254 
    255         if self.default_output is not None:

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/graph/op.py in compute_test_value(node)
    128     thunk.outputs = [storage_map[v] for v in node.outputs]
    129 
--> 130     required = thunk()
    131     assert not required  # We provided all inputs
    132 

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/graph/op.py in rval()
    604 
    605         def rval():
--> 606             thunk()
    607             for o in node.outputs:
    608                 compute_map[o][0] = True

~/opt/anaconda3/envs/topics-pymc3/lib/python3.9/site-packages/theano/link/c/basic.py in __call__(self)
   1769                 print(self.error_storage, file=sys.stderr)
   1770                 raise
-> 1771             raise exc_value.with_traceback(exc_trace)
   1772 
   1773 

ValueError: Input dimension mis-match. (input[0].shape[0] = 1, input[1].shape[0] = 2000)

Versions and main components

rpgoldman commented 3 years ago

Sorry: previous comment was wrong -- actually the use of pm.Data fails for mixture models also. I'm killing that comment, but leaving this here as explanation.

MarcoGorelli commented 3 years ago

(full code will be supplied as attachment -- but this may have an obvious answer.)

Did you forget to attach the code?

michaelosthege commented 3 years ago

I'm not too familiar with the 3.11.2 implementation, but when working on shape stuff for v4 I ran into problems with broadcasting of column vectors. The difference between the numpy array and pm.Data could be a different order of applying Ops such as as_tensor_variable, make_vector etc. In the end this can lead to differences in the .broadcastable attribute of the TensorVariables, making them (in)compatible with broadcasting operations.

If you do a theano.printing.debugprint(theta) with/without pm.Data for alpha you probably get differences in the graph that could explain the dimension problems.

michaelosthege commented 2 years ago

@rpgoldman can you check your model again with a recent version? I would suspect that the issue has been resolved in the meantime