aesara-devs / aesara

Aesara is a Python library for defining, optimizing, and efficiently evaluating mathematical expressions involving multi-dimensional arrays.
https://aesara.readthedocs.io
Other
1.18k stars 154 forks source link

Evaluation of etuplized RandomVariables fails for some distributions #1002

Open rlouf opened 2 years ago

rlouf commented 2 years ago

eval_if_etuple does not work when naively evaluating etuplized random variables. Let us consider the following model:

import aesara.tensor as at
from etuples import etuple, etuplize

srng = at.random.RandomStream(0)
Y_rv = srng.normal(1, 1)

Y_et = etuplize(Y_rv)
Y_et
# e(e(<class 'aesara.tensor.random.basic.NormalRV'>, normal, 0, (0, 0), floatX, False), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7F25FFC56420>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

The original object is stored in the ExpressionTuple instance, so the following works:

Y_et.evaled_obj
# normal_rv{0, (0, 0), floatX, False}.out

We can force the ExpressionTuple to re-evaluate instead of returning the saved object; in this case evaluation fails:

Y_et._evaled_obj = Y_et.null
Y_et.evaled_obj
# TypeError: Too many positional arguments

The issue was discussed in https://github.com/aesara-devs/aemcmc/pull/29 and prevents us to write miniKanren goals that represent two-way relations between expressions that contain random variables:

import aesara.tensor as at

from etuples import etuplize, etuple
from kanren import eq, lall, run, var

def normaleo(in_expr, out_expr):
    mu_lv, std_lv = var(), var()
    rng_lv, size_lv, dtype_lv = var(), var(), var()

    norm_in_lv = etuple(etuplize(at.random.normal), rng_lv, size_lv, dtype_lv, mu_lv, std_lv)
    norm_out_lv = etuple(etuplize(at.random.normal), rng_lv, size_lv, dtype_lv, mu_lv, std_lv)

    return lall(eq(in_expr, norm_in_lv), eq(out_expr, norm_out_lv))

srng = at.random.RandomStream(0)
Y_rv = srng.normal(1, 1)

y_lv = var()
(y_expr,) = run(1, y_lv, normaleo(Y_rv, y_lv))
y_expr.evaled_obj
# TypeError: Too many positional arguments

The reason, explained in this comment, is that the evaled_obj method performs op.__call__ when evaluating. However, __call__ does not dispatch to make_node systematically for =RandomVariable=s, hence the error.

Instead of dispatching op.__call__ to op.make_node for every random variable (which would be restrictive according to @brandonwillard), we can wrap RandomVariable Ops with a MakeNodeOp Op that dispatches __call__ to make_node:

from dataclasses import dataclass
from aesara.graph import Op, Variable
import aesara.tensor as at
from cons.core import ConsError, _car, _cdr
from etuples import etuple, etuplize

@dataclass
class MakeNodeOp:
    op: Op

    def __call__(self, *args):
        return self.op.make_node(*args)

def car_Variable(x):
    if x.owner:
        return MakeNodeOp(x.owner.op)
    else:
        raise ConsError("Not a cons pair.")

_car.add((Variable,), car_Variable)

def car_MakeNodeOp(x):
    return type(x)

_car.add((MakeNodeOp,), car_MakeNodeOp)

def cdr_MakeNodeOp(x):
    x_e = etuple(_car(x), x.op, evaled_obj=x)
    return x_e[1:]

_cdr.add((MakeNodeOp,), cdr_MakeNodeOp)

srng = at.random.RandomStream(0)
Y_rv = srng.normal(1, 1)
Y_et = etuplize(Y_rv)
Y_et
# e(e(<class '__main__.MakeNodeOp'>, e(<class 'aesara.tensor.random.basic.NormalRV'>, normal, 0, (0, 0), floatX, False)), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FEB800BE260>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

Y_et._evaled_obj = Y_et.null
Y_et.evaled_obj
# normal_rv{0, (0, 0), floatX, False}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FEB800BE260>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

The downside to this approach is that defining kanren goals becomes more cumbersome as we need to wrap random variables with MakeNodeOp like:

norm_et = etuple(etuplize(at.random.normal), *Y_et[1:])
norm_et.evaled_obj
# TypeError: too many position arguments

I suggest instead to dispatch etuplize for RandomVariable, which handles both the etuplization of random variables and of RandomVariables ops:

from dataclasses import dataclass

from aesara.graph import Op, Variable
import aesara.tensor as at
from aesara.tensor.random.basic import RandomVariable

from etuples import etuplize, etuple

@dataclass
class MakeRVNodeOp:
    op: Op

    def __call__(self, *args):
        return self.op.make_node(*args)

@etuplize.register(RandomVariable)
def etuplize_random(*args, **kwargs):
    return etuple(MakeRVNodeOp, etuplize.funcs[(object,)](*args, **kwargs))

srng = at.random.RandomStream(0)
Y_rv = srng.normal(1, 1)
Y_et = etuplize(Y_rv)
Y_et
# e(e(<class 'aesara.tensor.random.basic.NormalRV'>, normal, 0, (0, 0), floatX, False), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FF5DDA0E340>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

norm_et = etuple(etuplize(at.random.normal), *Y_et[1:])
norm_et.evaled_obj
# normal_rv{0, (0, 0), floatX, False}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FF5DDA0E340>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

And now the above kanren goal works both ways:

from kanren import run, lall, eq, var

def normaleo(in_expr, out_expr):
    mu_lv, std_lv = var(), var()
    rng_lv, size_lv, dtype_lv = var(), var(), var()

    norm_in_lv = etuple(etuplize(at.random.normal), rng_lv, size_lv, dtype_lv, mu_lv, std_lv)
    norm_out_lv = etuple(etuplize(at.random.normal), rng_lv, size_lv, dtype_lv, mu_lv, std_lv)

    return lall(eq(in_expr, norm_in_lv), eq(out_expr, norm_out_lv))

y_lv = var()
(y_expr,) = run(1, y_lv, normaleo(Y_rv, y_lv))
y_expr
# normal_rv{0, (0, 0), floatX, False}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FF5DDA0E340>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

y_lv = var()
(y_expr,) = run(1, y_lv, normaleo(y_lv, Y_rv))
y_expr
y_expr.evaled_obj
# normal_rv{0, (0, 0), floatX, False}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FF5DDA0E340>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

Note that the following code will still fail, however:

Y_et._evaled_obj = Y_et.null
Y_et.evaled_obj
# TypeError: Too many positional arguments

So we wil need a mix of the two approches. I will open a PR soon.

brandonwillard commented 2 years ago

Is your etuplize_random working as expected? I don't see a MakeRVNodeOp in the printed string output.

rlouf commented 2 years ago

Yes, if you print norm_et defined above you get:

norm_et
# e(e(<class '__main__.MakeRVNodeOp'>, e(<class 'aesara.tensor.random.basic.NormalRV'>, normal, 0, (0, 0), floatX, False)), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7F65BBBE9A80>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

i.e. etuplize(at.random.normal) does wrap the RandomVariable pp with MakeRVNode. The problem is that MakeRVNodeOp does not appear in Y_et.

My understanding is that while we dispatch etuplize, it is the inner function etuplize_step that is applied recursively (see here) and this one is not dispatched. Is that expected?

The only way is through dispatching _car the way you did above. Still, it would be nice if the code above with etuplize_random just worked.

rlouf commented 2 years ago

The following works:

from dataclasses import dataclass

from aesara.graph import Op, Variable
import aesara.tensor as at
from aesara.tensor.random.basic import RandomVariable

from cons.core import ConsError, _car, _cdr
from etuples import etuplize, etuple

@dataclass
class MakeRVNodeOp:
    op: Op

    def __call__(self, *args):
        return self.op.make_node(*args)

@etuplize.register(RandomVariable)
def etuplize_random(*args, **kwargs):
    return etuple(MakeRVNodeOp, etuplize.funcs[(object,)](*args, **kwargs))

def car_RVariable(x):
    if x.owner:
        return MakeRVNodeOp(x.owner.op)
    else:
        raise ConsError("Not a cons pair.")

_car.add((Variable,), car_RVariable)

def car_MakeNodeOp(x):
    return type(x)

_car.add((MakeRVNodeOp,), car_MakeNodeOp)

def cdr_MakeNodeOp(x):
    x_e = etuple(_car(x), x.op, evaled_obj=x)
    return x_e[1:]

_cdr.add((MakeRVNodeOp,), cdr_MakeNodeOp)

srng = at.random.RandomStream(0)
Y_rv = srng.normal(1, 1)
Y_et = etuplize(Y_rv)
print(Y_et)
# e(e(<class '__main__.MakeRVNodeOp'>, e(<class 'aesara.tensor.random.basic.NormalRV'>, normal, 0, (0, 0), floatX, False)), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7F07F31FD2A0>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

norm_et = etuple(etuplize(at.random.normal), *Y_et[1:])
print(norm_et)
# e(e(<class '__main__.MakeRVNodeOp'>, e(<class 'aesara.tensor.random.basic.NormalRV'>, normal, 0, (0, 0), floatX, False)), RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7F07F31FD2A0>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

which does feel a bit hacky; ideally dispatching etuplize would be enough to get the expected behavior. The current behavior, without knowing the internals of etuplize, is actually confusing.

rlouf commented 2 years ago

Finally, just making sure this solves the motivating issue (https://github.com/aesara-devs/aemcmc/pull/29) with running kanren goals forward and backward:

from kanren import eq, lall, run, var

def normaleo(in_expr, out_expr):
    mu_lv, std_lv = var(), var()
    rng_lv, size_lv, dtype_lv = var(), var(), var()

    norm_in_lv = etuple(etuplize(at.random.normal), rng_lv, size_lv, dtype_lv, mu_lv, std_lv)
    norm_out_lv = etuple(etuplize(at.random.normal), rng_lv, size_lv, dtype_lv, mu_lv, std_lv)

    return lall(eq(in_expr, norm_in_lv), eq(out_expr, norm_out_lv))

srng = at.random.RandomStream(0)
Y_rv = srng.normal(1, 1)
Y_et = etuplize(Y_rv)

y_lv = var()
(y_expr,) = run(1, y_lv, normaleo(Y_et, y_lv))
y_expr.evaled_obj
# normal_rv{0, (0, 0), floatX, False}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FC8E9B784A0>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})

y_lv_rev = var()
(y_expr_rev,) = run(1, y_lv_rev, normaleo(y_lv_rev, Y_et))
y_expr_rev.evaled_obj
# normal_rv{0, (0, 0), floatX, False}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FC8E9B784A0>), TensorConstant{[]}, TensorConstant{11}, TensorConstant{1}, TensorConstant{1})