apache / druid

Apache Druid: a high performance real-time analytics database.
https://druid.apache.org/
Apache License 2.0
13.53k stars 3.71k forks source link

RequireTimeCondition does not handle complex joins #17407

Open abhishek-chouhan opened 1 month ago

abhishek-chouhan commented 1 month ago

Enabling druid.sql.planner.requireTimeCondition results in valid join queries being blocked. Example -

SELECT distinct T1.dim1, T2.dim2 FROM (SELECT dim1 from druid.foo WHERE dim1 <> '' AND time >= '2000-01-01') AS T1, (SELECT dim2 from druid.foo WHERE dim1 <> '' AND time >= '2000-02-01') AS T2 WHERE T1.dim1=T2.dim2

Both the nested queries have a time filter for the above inner join, however the requireTimeCondition logic is looking for a time filter on the top level query.

JRobTS commented 4 days ago

Here's another example:

SELECT a.Id, a.__time AS a_time, b.__time AS b_time
FROM (
  SELECT Id, MAX(__time) AS __time
  FROM ds1
  WHERE __time >= CURRENT_TIMESTAMP - INTERVAL 7 DAY
  GROUP BY 1
) a LEFT JOIN (
  SELECT Id, __time
  FROM ds2
  WHERE __time >= CURRENT_TIMESTAMP - INTERVAL 10 DAY
) b ON a.Id = b.Id

Fails for reason:

requireTimeCondition is enabled, all queries must include a filter condition on the __time column

The query plan for the above:

{
  "queryType": "scan",
  "dataSource": {
    "type": "join",
    "left": {
      "type": "query",
      "query": {
        "queryType": "groupBy",
        "dataSource": {
          "type": "table",
          "name": "ds1"
        },
        "intervals": {
          "type": "intervals",
          "intervals": [
            "2024-11-22T14:51:41.940Z/146140482-04-24T15:36:27.903Z"
          ]
        },
        "granularity": {
          "type": "all"
        },
        "dimensions": [
          {
            "type": "default",
            "dimension": "Id",
            "outputName": "d0",
            "outputType": "STRING"
          }
        ],
        "aggregations": [
          {
            "type": "longMax",
            "name": "a0",
            "fieldName": "__time"
          }
        ],
        "limitSpec": {
          "type": "NoopLimitSpec"
        },
        "context": {
          "queryId": "a09bf6c8-7c77-44e0-b680-51a057b8727e",
          "sqlOuterLimit": 1001,
          "sqlQueryId": "a09bf6c8-7c77-44e0-b680-51a057b8727e",
          "sqlStringifyArrays": false,
          "useNativeQueryExplain": true
        }
      }
    },
    "right": {
      "type": "query",
      "query": {
        "queryType": "scan",
        "dataSource": {
          "type": "table",
          "name": "ds2"
        },
        "intervals": {
          "type": "intervals",
          "intervals": [
            "2024-11-19T14:51:41.940Z/146140482-04-24T15:36:27.903Z"
          ]
        },
        "resultFormat": "compactedList",
        "filter": {
          "type": "equals",
          "column": "Recipe",
          "matchValueType": "STRING",
          "matchValue": "foo"
        },
        "columns": [
          "Id",
          "__time"
        ],
        "legacy": false,
        "context": {
          "queryId": "a09bf6c8-7c77-44e0-b680-51a057b8727e",
          "sqlOuterLimit": 1001,
          "sqlQueryId": "a09bf6c8-7c77-44e0-b680-51a057b8727e",
          "sqlStringifyArrays": false,
          "useNativeQueryExplain": true
        },
        "granularity": {
          "type": "all"
        }
      }
    },
    "rightPrefix": "j0.",
    "condition": "(\"d0\" == \"j0.Id\")",
    "joinType": "LEFT"
  },
  "intervals": {
    "type": "intervals",
    "intervals": [
      "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
    ]
  },
  "resultFormat": "compactedList",
  "limit": 1001,
  "filter": {
    "type": "and",
    "fields": [
      {
        "type": "range",
        "column": "a0",
        "matchValueType": "LONG",
        "lower": 1732287101940
      },
      {
        "type": "range",
        "column": "j0.__time",
        "matchValueType": "LONG",
        "lower": 1732027901940
      }
    ]
  },
  "columns": [
    "a0",
    "d0",
    "j0.__time"
  ],
  "legacy": false,
  "context": {
    "queryId": "a09bf6c8-7c77-44e0-b680-51a057b8727e",
    "sqlOuterLimit": 1001,
    "sqlQueryId": "a09bf6c8-7c77-44e0-b680-51a057b8727e",
    "sqlStringifyArrays": false,
    "useNativeQueryExplain": true
  },
  "granularity": {
    "type": "all"
  }
}
JRobTS commented 4 days ago

This is a regression in Version 29.0.1 Such queries worked in a previous version of Druid.