metriql / metriql

The metrics layer for your data. Join us at https://metriql.com/slack
https://metriql.com
Apache License 2.0
295 stars 23 forks source link

Error Date-DateTime column in ClickHouse-Metriql #85

Open KrisAnTis-Group opened 2 years ago

KrisAnTis-Group commented 2 years ago

@buremba There is a problem with using Date-DateTime fields that have null values.

We use Metriql to bundle dbt, ClickHouse, Metriql, Google Data Studio technologies. after some update, errors began to appear in the GDS of the form:

image

Errors occur when the column is of type Date or DateTime and contains null values

To demonstrate an example, we can use the following query:

with t1 as (
    select 
        [toDate(null), toDate('2022-06-05')] as date_field_arr,
        [1,2] as num_field_arr
  ),
  t2 as (
    select 
        arrayJoin(date_field_arr) as date_field,
        arrayJoin(num_field_arr) as num_field
    from t1
  )
  select 
    CAST(date_field, 'Nullable(date)') as date_field_null,
    num_field
  from t2

image

I am using curl to send a request to our metriql:

curl --location --request POST 'https://metriql.***.io/api/v0/query' \
--header 'Authorization: Basic ***REPLACE***=' \
--header 'Content-Type: application/json' \
--data-raw '{
    "type": "sql",
    "report": {
        "query": "  with t1 as (
                        select 
                            [toDate(null), toDate('\''2022-06-05'\'')] as date_field_arr,
                            [1,2] as num_field_arr
                    ),
                    t2 as (
                        select 
                            arrayJoin(date_field_arr) as date_field,
                            arrayJoin(num_field_arr) as num_field
                        from t1
                    )
                    select 
                        CAST(date_field, '\''Nullable(date)'\'') as date_field_null,
                        num_field
                    from t2
                    "
    }
}'

And I get the following response:

{
    "id": "***",
    "startedAt": "2022-06-17T12:24:53.783122Z",
    "duration": 0.132326000,
    "user": "***",
    "source": null,
    "status": "failed",
    "update": {
        "state": "FINISHED",
        "info": {
            "reportType": "sql",
            "query": {
                "query": "  with t1 as (\n select \n     [toDate(null), toDate('2022-06-05')] as date_field_arr,\n     [1,2] as num_field_arr\n                    ),\n                    t2 as (\n select \n     arrayJoin(date_field_arr) as date_field,\n     arrayJoin(num_field_arr) as num_field\n from t1\n                    )\n                    select \n CAST(date_field, 'Nullable(date)') as date_field_null,\n num_field\n                    from t2\n                    ",
                "queryOptions": null,
                "variables": null,
                "reportOptions": null
            },
            "compiledQuery": "  with t1 as (\n select \n     [toDate(null), toDate('2022-06-05')] as date_field_arr,\n     [1,2] as num_field_arr\n                    ),\n                    t2 as (\n select \n     arrayJoin(date_field_arr) as date_field,\n     arrayJoin(num_field_arr) as num_field\n from t1\n                    )\n                    select \n CAST(date_field, 'Nullable(date)') as date_field_null,\n num_field\n                    from t2\n                    "
        },
        "nodes": 1,
        "percentage": null,
        "elapsedTimeMillis": null,
        "totalBytes": null,
        "processedBytes": null
    },
    "result": {
        "metadata": null,
        "result": null,
        "error": {
            "message": "Error while fetching column `date_field_null [DATE]: null`",
            "sqlState": null,
            "errorCode": null,
            "errorLine": null,
            "charPositionInLine": null
        },
        "properties": {
            "limit": 1000,
            "query": "  with t1 as (\n select \n     [toDate(null), toDate('2022-06-05')] as date_field_arr,\n     [1,2] as num_field_arr\n                    ),\n                    t2 as (\n select \n     arrayJoin(date_field_arr) as date_field,\n     arrayJoin(num_field_arr) as num_field\n from t1\n                    )\n                    select \n CAST(date_field, 'Nullable(date)') as date_field_null,\n num_field\n                    from t2\n                    "
        },
        "responseHeaders": null
    }
}

As you can see metriql itself returns an error:

Error while fetching column `date_field_null [DATE]: null`

At the same time, if there were no NULL values in the table, then we would get the following:

 with t1 as (
    select 
        [toDate('2022-06-05'), toDate('2022-06-05')] as date_field_arr,
        [1,2] as num_field_arr
  ),
  t2 as (
    select 
        arrayJoin(date_field_arr) as date_field,
        arrayJoin(num_field_arr) as num_field
    from t1
  )
  select 
    CAST(date_field, 'Nullable(date)') as date_field_null,
    num_field
  from t2
curl --location --request POST 'https://metriql.***.io/api/v0/query' \
--header 'Authorization: Basic ***REPLACE***=' \
--header 'Content-Type: application/json' \
--data-raw '{
    "type": "sql",
    "report": {
        "query": "  with t1 as (
                        select 
                            [toDate('\''2022-06-06'\''), toDate('\''2022-06-05'\'')] as date_field_arr,
                            [1,2] as num_field_arr
                    ),
                    t2 as (
                        select 
                            arrayJoin(date_field_arr) as date_field,
                            arrayJoin(num_field_arr) as num_field
                        from t1
                    )
                    select 
                        CAST(date_field, '\''Nullable(date)'\'') as date_field_null,
                        num_field
                    from t2
                    "
    }
}'
{
    "id": "***",
    "startedAt": "2022-06-17T12:35:33.012323Z",
    "duration": 0.194213000,
    "user": "***",
    "source": null,
    "status": "finished",
    "update": {
        "state": "FINISHED",
        "info": {
            "reportType": "sql",
            "query": {
                "query": "  with t1 as (\n                        select \n                            [toDate('2022-06-06'), toDate('2022-06-05')] as date_field_arr,\n                            [1,2] as num_field_arr\n                    ),\n                    t2 as (\n                        select \n                            arrayJoin(date_field_arr) as date_field,\n                            arrayJoin(num_field_arr) as num_field\n                        from t1\n                    )\n                    select \n                        CAST(date_field, 'Nullable(date)') as date_field_null,\n                        num_field\n                    from t2\n                    ",
                "queryOptions": null,
                "variables": null,
                "reportOptions": null
            },
            "compiledQuery": "  with t1 as (\n                        select \n                            [toDate('2022-06-06'), toDate('2022-06-05')] as date_field_arr,\n                            [1,2] as num_field_arr\n                    ),\n                    t2 as (\n                        select \n                            arrayJoin(date_field_arr) as date_field,\n                            arrayJoin(num_field_arr) as num_field\n                        from t1\n                    )\n                    select \n                        CAST(date_field, 'Nullable(date)') as date_field_null,\n                        num_field\n                    from t2\n                    "
        },
        "nodes": 1,
        "percentage": null,
        "elapsedTimeMillis": null,
        "totalBytes": null,
        "processedBytes": null
    },
    "result": {
        "metadata": [
            {
                "name": "date_field_null",
                "position": 0,
                "type": "date",
                "dbType": "Nullable(Date)"
            },
            {
                "name": "num_field",
                "position": 1,
                "type": "integer",
                "dbType": "UInt8"
            }
        ],
        "result": [
            [
                "2022-06-06",
                1
            ],
            [
                "2022-06-06",
                2
            ],
            [
                "2022-06-05",
                1
            ],
            [
                "2022-06-05",
                2
            ]
        ],
        "properties": {
            "limit": 1000,
            "query": "  with t1 as (\n                        select \n                            [toDate('2022-06-06'), toDate('2022-06-05')] as date_field_arr,\n                            [1,2] as num_field_arr\n                    ),\n                    t2 as (\n                        select \n                            arrayJoin(date_field_arr) as date_field,\n                            arrayJoin(num_field_arr) as num_field\n                        from t1\n                    )\n                    select \n                        CAST(date_field, 'Nullable(date)') as date_field_null,\n                        num_field\n                    from t2\n                    "
        },
        "responseHeaders": null
    }
}

Here we see that everything works correctly and the values are returned