opensearch-project / opensearch-spark

Spark Accelerator framework ; It enables secondary indices to remote data stores.
Apache License 2.0
14 stars 23 forks source link

[BUG] stats function doesn't work in PPL direct query #720

Open A-Gray-Cat opened 1 day ago

A-Gray-Cat commented 1 day ago

What is the bug? When using stats count() in a PPL query, the returned count is 0, while there's more than 0 documents available.

How can one reproduce the bug? Steps to reproduce the behavior:

  1. Run the following query
    source = amazon_security_lake_glue_db_us_east_1.amazon_security_lake_table_us_east_1_cloud_trail_mgmt_2_0 | where time_dt >= '2024-09-29' | stats count() as total | fields total

    and it will return 0 as the count number:

    image

What is the expected behavior? It should return the correct number of documents counted. Note: I haven't tested the other stats functions yet, and we should go through the rest of them to make sure they work as expected.

What is your host/environment?

Do you have any screenshots? If applicable, add screenshots to help explain your problem.

Do you have any additional context? Add any other context about the problem.

A-Gray-Cat commented 1 day ago

Update: This problem seems like only happen to certain tables, not all of them.

A-Gray-Cat commented 1 day ago

Sample data

{
  "metadata": "{\"uid\":\"acc63c4c-16fa-344e-aea4-e5ed833b6a84\",\"product\":{\"feature\":{\"name\":\"Management\"},\"name\":\"CloudTrail\",\"vendor_name\":\"AWS\",\"version\":\"1.08\"},\"event_code\":\"AwsApiCall\",\"profiles\":[\"cloud\",\"datetime\"],\"version\":\"1.1.0\"}",
  "time": 1727669676000,
  "time_dt": "2024-09-30T04:14:36.000Z",
  "cloud": "{\"provider\":\"AWS\",\"region\":\"ap-southeast-2\"}",
  "api": "{\"request\":{\"uid\":\"5801b90b-7c6c-4cc2-a9d0-6162bdc1f5ce\",\"data\":\"{\\\"roleArn\\\":\\\"arn:aws:iam::REDACTED:role/service-role/AmazonSecurityLakeMetaStoreManager\\\",\\\"roleSessionName\\\":\\\"awslambda_249_20240930041436835\\\"}\"},\"response\":{\"data\":\"{\\\"credentials\\\":{\\\"accessKeyId\\\":\\\"REDACTED\\\",\\\"sessionToken\\\":\\\"REDACTED\\\",\\\"expiration\\\":\\\"Sep 30, 2024, 4:24:36 PM\\\"}}\"},\"service\":{\"name\":\"sts.amazonaws.com\"},\"operation\":\"AssumeRole\"}",
  "dst_endpoint": "null",
  "actor": "{\"invoked_by\":\"lambda.amazonaws.com\",\"user\":{\"type\":\"AWSService\"}}",
  "http_request": "{\"user_agent\":\"lambda.amazonaws.com\"}",
  "src_endpoint": "{\"domain\":\"lambda.amazonaws.com\"}",
  "session": "{\"credential_uid\":\"REDACTED\"}",
  "policy": "null",
  "resources": "null",
  "class_name": "Authentication",
  "class_uid": 3002,
  "category_name": "Identity & Access Management",
  "category_uid": 3,
  "severity_id": 1,
  "severity": "Informational",
  "user": "{\"uid\":\"arn:aws:iam::REDACTED:role/service-role/REDACTED\",\"name\":\"REDACTED\"}",
  "activity_name": "Logon",
  "activity_id": 1,
  "type_uid": 300201,
  "type_name": "Authentication: Logon",
  "status": "null",
  "is_mfa": "null",
  "unmapped": "{\"resources[].type\":\"AWS::IAM::Role\",\"sharedEventID\":\"3225f114-d22e-49f5-96c9-c2f8576771fe\",\"resources[].accountId\":\"REDACTED\",\"recipientAccountId\":\"REDACTED\",\"readOnly\":\"true\",\"managementEvent\":\"true\",\"resources[].ARN\":\"arn:aws:iam::REDACTED:role/service-role/AmazonSecurityLakeMetaStoreManager\"}",
  "accountid": "REDACTED",
  "region": "ap-southeast-2",
  "asl_version": "2_0",
  "observables": "[{\"type_id\":4,\"name\":\"user.name\",\"type\":\"User Name\",\"value\":\"REDACTED\"},{\"type_id\":1,\"name\":\"src_endpoint.domain\",\"type\":\"Hostname\",\"value\":\"lambda.amazonaws.com\"}]"
}

Table schema

[
  {
    "Name": "metadata",
    "Type": "struct<product:struct<version:string,name:string,vendor_name:string,feature:struct<name:string>>,event_code:string,uid:string,profiles:array<string>,version:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "1",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "time",
    "Type": "bigint",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "2",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "time_dt",
    "Type": "timestamp",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "3",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "cloud",
    "Type": "struct<region:string,provider:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "4",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "api",
    "Type": "struct<response:struct<error:string,message:string,data:string>,operation:string,version:string,service:struct<name:string>,request:struct<data:string,uid:string>>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "5",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "dst_endpoint",
    "Type": "struct<svc_name:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "6",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "actor",
    "Type": "struct<user:struct<type:string,name:string,uid_alt:string,uid:string,account:struct<uid:string>,credential_uid:string>,session:struct<created_time_dt:timestamp,is_mfa:boolean,issuer:string>,invoked_by:string,idp:struct<name:string>>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "7",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "http_request",
    "Type": "struct<user_agent:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "8",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "src_endpoint",
    "Type": "struct<uid:string,ip:string,domain:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "9",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "session",
    "Type": "struct<uid:string,uid_alt:string,credential_uid:string,issuer:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "10",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "policy",
    "Type": "struct<uid:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "11",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "resources",
    "Type": "array<struct<uid:string,owner:struct<account:struct<uid:string>>,type:string>>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "12",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "class_name",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "13",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "class_uid",
    "Type": "int",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "14",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "category_name",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "15",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "category_uid",
    "Type": "int",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "16",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "severity_id",
    "Type": "int",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "17",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "severity",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "18",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "user",
    "Type": "struct<uid_alt:string,uid:string,name:string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "19",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "activity_name",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "20",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "activity_id",
    "Type": "int",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "21",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "type_uid",
    "Type": "bigint",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "22",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "type_name",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "23",
      "iceberg.field.optional": "false"
    }
  },
  {
    "Name": "status",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "24",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "is_mfa",
    "Type": "boolean",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "25",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "unmapped",
    "Type": "map<string,string>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "26",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "accountid",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "27",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "region",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "28",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "asl_version",
    "Type": "string",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "29",
      "iceberg.field.optional": "true"
    }
  },
  {
    "Name": "observables",
    "Type": "array<struct<name:string,value:string,type:string,type_id:int>>",
    "Parameters": {
      "iceberg.field.current": "true",
      "iceberg.field.id": "30",
      "iceberg.field.optional": "true"
    }
  }
]