larray-project / larray

N-dimensional labelled arrays in Python
https://larray.readthedocs.io/
GNU General Public License v3.0
8 stars 6 forks source link

implement Array.query #1058

Open gdementen opened 1 year ago

gdementen commented 1 year ago

Here is a bit of code I did for Geert:

def eval_expr_on_array(arr, expr):
    compiled = compile(expr, '<expr>', 'eval')
    symbols = compiled.co_names
    context = {symbol: arr[symbol] for symbol in symbols}
    return eval(compiled, context)

>>> arr = ndtest("axis=a,b,c,d")
>>> expr = "a + b - (c * d)"
>>> eval_expr_on_array(arr, expr)
-5

But it does not support ambiguous labels. Then, for the fun of it, I thought it wouldn't be too hard to implement that and I did the following code:

class ArrayLabel:
    def __init__(self, array, name):
        self.array = array
        self.name = name

    def eval(self):
        return self.array[self.name]

    def _binop(opname):
        fullname = f'__{opname}__'
        super_method = getattr(Array, fullname)
        def opmethod(self, other) -> 'Array':
            assert isinstance(other, ArrayLabel), f"{other} is not ArrayLabel ({type(other)})"
            other = other.eval()
            return super_method(self.eval(), other)
        opmethod.__name__ = fullname
        return opmethod

    __lt__ = _binop('lt')
    __le__ = _binop('le')
    __eq__ = _binop('eq')
    __ne__ = _binop('ne')
    __gt__ = _binop('gt')
    __ge__ = _binop('ge')
    __add__ = _binop('add')
    __radd__ = _binop('radd')
    __sub__ = _binop('sub')
    __rsub__ = _binop('rsub')
    __mul__ = _binop('mul')
    __rmul__ = _binop('rmul')
    # div and rdiv are not longer used on Python3+
    __truediv__ = _binop('truediv')
    __rtruediv__ = _binop('rtruediv')
    __floordiv__ = _binop('floordiv')
    __rfloordiv__ = _binop('rfloordiv')
    __mod__ = _binop('mod')
    __rmod__ = _binop('rmod')
    __divmod__ = _binop('divmod')
    __rdivmod__ = _binop('rdivmod')
    __pow__ = _binop('pow')
    __rpow__ = _binop('rpow')
    __lshift__ = _binop('lshift')
    __rlshift__ = _binop('rlshift')
    __rshift__ = _binop('rshift')
    __rrshift__ = _binop('rrshift')
    __and__ = _binop('and')
    __rand__ = _binop('rand')
    __xor__ = _binop('xor')
    __rxor__ = _binop('rxor')
    __or__ = _binop('or')
    __ror__ = _binop('ror')

class AxisModifier:
    def __init__(self, array, axis):
        self.array = array
        self.axis = axis
    def __getitem__(self, subset):
        assert isinstance(subset, ArrayLabel)
        return ArrayLabel(self.array, self.axis[subset.name])

def query_array(array, expr):
    compiled = compile(expr, '<expr>', 'eval')
    symbols = compiled.co_names
    axes = array.axes
    context = {symbol: AxisModifier(array, axes[symbol]) if symbol in axes else ArrayLabel(array, symbol)
               for symbol in symbols}
    return eval(compiled, context)

>>> arr = ndtest("axis1=a,b,c,d;axis2=b,e,f")
>>> expr = "a + axis1[b] - (c * d)"
>>> query_array(arr, expr)
axis2    b    e    f
       -51  -65  -81

... but it's buggy. It depends on the evaluation order of operands and we get at some point binops between an Array and an ArrayLabel, which return False (courtesy of Array._binop which does so for unknown types -- btw, this behavior was intended for == and is really surprising for other operators)