opendistro-for-elasticsearch / sql

🔍 Open Distro SQL Plugin
https://opendistro.github.io/for-elasticsearch/features/SQL%20Support.html
Apache License 2.0
621 stars 186 forks source link

Why sql with format jdbc return different date field value in opendistro sql 1.10 ? #845

Open ZhiXingHeYiApple opened 3 years ago

ZhiXingHeYiApple commented 3 years ago

index mapping

{
  "nyc-taxis-2020-11-23-1" : {
    "mappings" : {
      "dynamic" : "strict",
      "properties" : {
        "cab_color" : {
          "type" : "keyword"
        },
        "dropoff_datetime" : {
          "type" : "date",
          "format" : "date_optional_time||yyyy-MM-dd HH:mm:ss||epoch_millis"
        },
        "dropoff_location" : {
          "type" : "geo_point"
        },
        "ehail_fee" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "extra" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "fare_amount" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "improvement_surcharge" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "mta_tax" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "passenger_count" : {
          "type" : "integer"
        },
        "payment_type" : {
          "type" : "keyword"
        },
        "pickup_datetime" : {
          "type" : "date",
          "format" : "date_optional_time||yyyy-MM-dd HH:mm:ss||epoch_millis"
        },
        "pickup_location" : {
          "type" : "geo_point"
        },
        "rate_code_id" : {
          "type" : "keyword"
        },
        "store_and_fwd_flag" : {
          "type" : "keyword"
        },
        "surcharge" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "tip_amount" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "tolls_amount" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "total_amount" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "trip_distance" : {
          "type" : "scaled_float",
          "scaling_factor" : 100.0
        },
        "trip_type" : {
          "type" : "keyword"
        },
        "vendor_id" : {
          "type" : "keyword"
        },
        "vendor_name" : {
          "type" : "text"
        }
      }
    }
  }
}

dropoff_datetime and pickup_datetime has the same date configuration. But When execute the below sql in opendistro1.0 and opendistro1.10.1, the result is different.

POST _opendistro/_sql?format=jdbc
{
  "query": "select pickup_datetime,dropoff_datetime from nyc-taxis-2020-11-23-1 where cab_color = 'red' limit 10"
}

opendistro1.0 (ES6.8) result:

{
  "schema": [
    {
      "name": "pickup_datetime",
      "type": "date"
    },
    {
      "name": "dropoff_datetime",
      "type": "date"
    }
  ],
  "total": 1,
  "datarows": [[
    "2014-12-31T16:34:44.000Z",
    "2014-12-31T16:38:15.000Z"
  ]],
  "size": 1,
  "status": 200
}

opendistro1.10.1 (ES6.9) result:

{
  "schema": [
    {
      "name": "pickup_datetime",
      "type": "date"
    },
    {
      "name": "dropoff_datetime",
      "type": "date"
    }
  ],
  "total": 1,
  "datarows": [[
    "2014-12-31 16:34:44.000",
    "2014-12-31T16:38:15.000Z"
  ]],
  "size": 1,
  "status": 200
}

It's my expect behavior in old version opendistro1.10. Why there is different return date field value in more higher version.

penghuo commented 3 years ago

Thanks for reporting the issue. Could you share the raw docs for debuing the issue?

  1. Using the explain api to get the Elasticsearch DSL.
    POST _opendistro/_sql/_explain
    {
    "query": "select pickup_datetime,dropoff_datetime from nyc-taxis-2020-11-23-1 where cab_color = 'red' limit 10"
    }
  2. Run Elasticsearch DSL to pull the raw docs
ZhiXingHeYiApple commented 3 years ago

@penghuo Using the explain api to get the Elasticsearch DSL.

POST _opendistro/_sql/_explain
{
  "query": "select pickup_datetime,dropoff_datetime from nyc-taxis-2020-11-23-1 where cab_color = 'red' limit 10"
}

// DSL
{
  "from" : 0,
  "size" : 10,
  "query" : {
    "bool" : {
      "filter" : [
        {
          "bool" : {
            "must" : [
              {
                "term" : {
                  "cab_color" : {
                    "value" : "red",
                    "boost" : 1.0
                  }
                }
              }
            ],
            "adjust_pure_negative" : true,
            "boost" : 1.0
          }
        }
      ],
      "adjust_pure_negative" : true,
      "boost" : 1.0
    }
  },
  "_source" : {
    "includes" : [
      "pickup_datetime",
      "dropoff_datetime"
    ],
    "excludes" : [ ]
  }
}

use the DSL to pull the raw docs

{
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 0.0,
    "hits" : [
      {
        "_index" : "nyc-taxis-2020-11-23-1",
        "_type" : "_doc",
        "_id" : "test-document-id",
        "_score" : 0.0,
        "_source" : {
          "pickup_datetime" : "2014-12-31T16:34:44.000Z",
          "dropoff_datetime" : "2014-12-31T16:38:15.000Z"
        }
      }
    ]
  }
}

The full raw doc is:

 {
          "cab_color" : "red",
          "improvement_surcharge" : 0.0,
          "vendor_id" : "1",
          "dropoff_location" : [
            -73.8465805053711,
            40.71156692504883
          ],
          "trip_distance" : 0.8999999761581421,
          "vendor_name" : null,
          "tolls_amount" : 0.0,
          "trip_type" : "1",
          "tip_amount" : 1.7999999523162842,
          "payment_type" : "1",
          "fare_amount" : 5.0,
          "passenger_count" : 1,
          "store_and_fwd_flag" : "N",
          "extra" : 0.5,
          "dropoff_datetime" : "2014-12-31T16:38:15.000Z",
          "pickup_location" : [
            -73.84300994873047,
            40.71905517578125
          ],
          "ehail_fee" : 0.0,
          "rate_code_id" : "1",
          "total_amount" : 7.800000190734863,
          "pickup_datetime" : "2014-12-31T16:34:44.000Z",
          "mta_tax" : 0.5,
          "surcharge" : 0.0
}
penghuo commented 3 years ago

Thanks for the info, looking at the issue now.

penghuo commented 3 years ago

In ODFE 1.11.0, we add the DateFieldFormatter to format the result as JDBC required yyyy-MM-dd HH:mm:ss.SSS. But unfortunatelly, there is a bug which only format the first raw and exist. The expected result should be

{
  "schema": [
    {
      "name": "pickup_datetime",
      "type": "date"
    },
    {
      "name": "dropoff_datetime",
      "type": "date"
    }
  ],
  "total": 1,
  "datarows": [[
    "2014-12-31 16:34:44.000",
    "2014-12-31 16:38:15.000"
  ]],
  "size": 1,
  "status": 200
}