DeepLcom / sql-mock

A Python library to test your SQL models using mocked input data
MIT License
33 stars 5 forks source link

v0.6.1 Can't parse Clickhouse `argMaxIf` function #53

Closed smoothml closed 5 months ago

smoothml commented 5 months ago

Since v0.6.1 SQLMock is unable to parse Clickhouse's argMaxIf function.

Consider the following example:

from datetime import datetime

from sql_mock.clickhouse import column_mocks as col
from sql_mock.clickhouse.table_mocks import ClickHouseTableMock
from sql_mock.table_mocks import table_meta

query = """SELECT
    user_id,
    count() AS num_sessions,
    countIf(valid = 1) AS num_valid_sessions,
    argMaxIf(in_trial, dt, isNotNull(in_trial)) AS in_trial
FROM sessions
GROUP BY user_id
"""

@table_meta(table_ref="sessions")
class SessionsMock(ClickHouseTableMock):
    dt = col.Datetime(default=datetime(2024, 1, 1 ,10, 30))
    valid = col.Boolean(default=True)
    user_id = col.String(default="foo")
    in_trial = col.Int(default=0, nullable=True)

@table_meta(query=query)
class ResultMock(ClickHouseTableMock):
    user_id = col.String(default="foo")
    num_sessions = col.Int(default=0)
    num_valid_sessions = col.Int(default=0)
    in_trial = col.Int(default=0)

def test_example() -> None:
    sessions_mock = SessionsMock.from_dicts(
        [
            dict(
                dt=datetime(2024, 1, 1, 10),
                valid=1,
                user_id="foo",
                in_trial=1,
            ),
            dict(
                dt=datetime(2024, 1, 2, 10),
                valid=0,
                user_id="foo",
                in_trial=1,
            ),
            dict(
                dt=datetime(2024, 1, 3, 10),
                valid=1,
                user_id="foo",
                in_trial=1,
            ),
            dict(
                dt=datetime(2024, 1, 4, 10),
                valid=1,
                user_id="foo",
                in_trial=None,
            ),
            dict(
                dt=datetime(2024, 1, 5, 10),
                valid=1,
                user_id="foo",
                in_trial=0,
            ),
        ]
    )

    result = ResultMock.from_mocks(input_data=[sessions_mock])

    expected = [
        dict(
            user_id="foo",
            num_sessions=5,
            num_valid_sessions=4,
            in_trial=0,
        )
    ]

    result.assert_equal(expected)

Running test_example results in the following exception:

self = <sqlglot.generator.Generator object at 0x107920d60>
expression = ('argMax', 'If'), key = None, comment = True

    def sql(
        self,
        expression: t.Optional[str | exp.Expression],
        key: t.Optional[str] = None,
        comment: bool = True,
    ) -> str:
        if not expression:
            return ""

        if isinstance(expression, str):
            return expression

        if key:
            value = expression.args.get(key)
            if value:
                return self.sql(value)
            return ""

        transform = self.TRANSFORMS.get(expression.__class__)

        if callable(transform):
            sql = transform(self, expression)
        elif transform:
            sql = transform
        elif isinstance(expression, exp.Expression):
            exp_handler_name = f"{expression.key}_sql"

            if hasattr(self, exp_handler_name):
                sql = getattr(self, exp_handler_name)(expression)
            elif isinstance(expression, exp.Func):
                sql = self.function_fallback_sql(expression)
            elif isinstance(expression, exp.Property):
                sql = self.property_sql(expression)
            else:
                raise ValueError(f"Unsupported expression type {expression.__class__.__name__}")
        else:
>           raise ValueError(f"Expected an Expression. Received {type(expression)}: {expression}")
E           ValueError: Expected an Expression. Received <class 'tuple'>: ('argMax', 'If')

We do not see this error for other xIf functions (e.g. the countIf in the example query) or for functions without it If (e.g. plain argMax). My initial thought was a change in sqlglot, but the version specified in poetry.lock has not changed in this release.

Somtom commented 5 months ago

@smoothml Just to confirm: With previous versions it was working?

My first thought was that we might have forgotten to add a dialect to some sqlglot call somewhere - but I cannot spot anything (apart from that we probably want to add the dialect to those test parsings as well)

What I found though is that we forgot to remove a print statement.

smoothml commented 5 months ago

Sorry @Somtom , just getting back to this.

Turns out it was indeed the print statement 🤯