Open sarchila opened 3 weeks ago
Chiming in here to note that I found a strange workaround that ends up giving the correct result, and that is if I replace measure(order_count) AS value
with measure(order_count) + 0 AS value
🤔
So, all told, here are the query results for the two almost-identical queries:
=> WITH pre_aggregation AS
(
SELECT
status,
date_trunc('month', created_at) AS created_at__month,
measure(order_count) AS value -- Only difference in the queries is here
FROM orders_cube
WHERE status IN ('processing', 'completed', 'shipped')
GROUP BY 1, 2
)
SELECT *
FROM pre_aggregation
WHERE (status = 'completed') AND (created_at__month = CAST('2019-01-01 00:00:00' AS TIMESTAMP));
status | created_at__month | value
--------+-------------------+-------
(0 rows)
---------------------------------------------------------------------------------------------
=> WITH pre_aggregation AS
(
SELECT
status,
date_trunc('month', created_at) AS created_at__month,
measure(order_count) + 0 AS value -- Only difference in the queries is here
FROM orders_cube
WHERE status IN ('processing', 'completed', 'shipped')
GROUP BY 1, 2
)
SELECT *
FROM pre_aggregation
WHERE (status = 'completed') AND (created_at__month = CAST('2019-01-01 00:00:00' AS TIMESTAMP));
status | created_at__month | value
-----------+----------------------------+-------
completed | 2019-01-01 00:00:00.000000 | 48
(1 row)
and here are the EXPLAIN results:
=> EXPLAIN WITH pre_aggregation AS
(
SELECT
status,
date_trunc('month', created_at) AS created_at__month,
measure(order_count) AS value -- Only difference in the queries is here
FROM orders_cube
WHERE status IN ('processing', 'completed', 'shipped')
GROUP BY 1, 2
)
SELECT *
FROM pre_aggregation
WHERE (status = 'completed') AND (created_at__month = CAST('2019-01-01 00:00:00' AS TIMESTAMP));
plan_type | plan
---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
logical_plan | CubeScan: request={ +
| "measures": [ +
| "orders_cube.order_count" +
| ], +
| "dimensions": [ +
| "orders_cube.status" +
| ], +
| "segments": [], +
| "timeDimensions": [ +
| { +
| "dimension": "orders_cube.created_at", +
| "granularity": "month", +
| "dateRange": [ +
| "2019-01-01T00:00:00.000Z", +
| "2019-01-01T00:00:00.000Z" +
| ] +
| } +
| ], +
| "filters": [ +
| { +
| "member": "orders_cube.status", +
| "operator": "equals", +
| "values": [ +
| "processing", +
| "completed", +
| "shipped" +
| ] +
| }, +
| { +
| "member": "orders_cube.status", +
| "operator": "equals", +
| "values": [ +
| "completed" +
| ] +
| } +
| ] +
| }
physical_plan | CubeScanExecutionPlan, Request: +
| {"measures":["orders_cube.order_count"],"dimensions":["orders_cube.status"],"segments":[],"timeDimensions":[{"dimension":"orders_cube.created_at","granularity":"month","dateRange":["2019-01-01T00:00:00.000Z","2019-01-01T00:00:00.000Z"]}],"filters":[{"member":"orders_cube.status","operator":"equals","values":["processing","completed","shipped"]},{"member":"orders_cube.status","operator":"equals","values":["completed"]}]}+
---------------------------------------------------------------------------------------------
=> EXPLAIN WITH pre_aggregation AS
(
SELECT
status,
date_trunc('month', created_at) AS created_at__month,
measure(order_count) + 0 AS value -- Only difference in the queries is here
FROM orders_cube
WHERE status IN ('processing', 'completed', 'shipped')
GROUP BY 1, 2
)
SELECT *
FROM pre_aggregation
WHERE (status = 'completed') AND (created_at__month = CAST('2019-01-01 00:00:00' AS TIMESTAMP));
plan_type | plan
---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
logical_plan | Projection: #pre_aggregation.status, #pre_aggregation.created_at__month, #pre_aggregation.value +
| Filter: #pre_aggregation.status = Utf8("completed") AND #pre_aggregation.created_at__month = TimestampNanosecond(1546300800000000000, None) AS CAST(Utf8("2019-01-01 00:00:00") AS Timestamp(Nanosecond, None)) +
| Projection: #orders_cube.status, #datetrunc(Utf8("month"),orders_cube.created_at) AS created_at__month, #measure(orders_cube.order_count) + Int64(0) AS value, alias=pre_aggregation +
| CubeScan: request={ +
| "measures": [ +
| "orders_cube.order_count" +
| ], +
| "dimensions": [ +
| "orders_cube.status" +
| ], +
| "segments": [], +
| "timeDimensions": [ +
| { +
| "dimension": "orders_cube.created_at", +
| "granularity": "month" +
| } +
| ], +
| "filters": [ +
| { +
| "member": "orders_cube.status", +
| "operator": "equals", +
| "values": [ +
| "processing", +
| "completed", +
| "shipped" +
| ] +
| } +
| ] +
| }
physical_plan | ProjectionExec: expr=[status@0 as status, created_at__month@1 as created_at__month, value@2 as value] +
| CoalesceBatchesExec: target_batch_size=4096 +
| FilterExec: status@0 = completed AND created_at__month@1 = 1546300800000000000 +
| ProjectionExec: expr=[status@0 as status, datetrunc(Utf8("month"),orders_cube.created_at)@1 as created_at__month, measure(orders_cube.order_count)@2 + 0 as value] +
| CubeScanExecutionPlan, Request: +
| {"measures":["orders_cube.order_count"],"dimensions":["orders_cube.status"],"segments":[],"timeDimensions":[{"dimension":"orders_cube.created_at","granularity":"month"}],"filters":[{"member":"orders_cube.status","operator":"equals","values":["processing","completed","shipped"]}]}+
|
Failed SQL The SQL does not fail, but it does return the incorrect results. In the EXPLAIN example below, it should have found a corresponding row, but instead returned no results.
Logical Plan Same results when using
CUBESQL_SQL_PUSH_DOWN
= true or false:Version: 0.35.47
Additional context The problem here seems to be in the timeDimensions.dateRange, which is producing an inaccurate range.