taozhi8833998 / node-sql-parser

Parse simple SQL statements into an abstract syntax tree (AST) with the visited tableList and convert it back to SQL
https://taozhi8833998.github.io/node-sql-parser/
Apache License 2.0
780 stars 174 forks source link

Athena query with UNNEST and SEQUENCE is not parsed #1997

Closed LaizaAngrest closed 1 month ago

LaizaAngrest commented 1 month ago

Describe the bug Athena query with UNNEST and SEQUENCE is not parsed

Database Engine Athena

To Reproduce

it('should parse a query UNNEST and sequence', function () {
    const parser = new SqlParserV2()
    const query = `
    with org_mapping AS (
     select * from (
     select trim(lower(name)) as name, organization_name,
     ROW_NUMBER() OVER (PARTITION BY trim(lower(name))) as rn
     from bronze_sales_prod.drive_organization_mapping)
     where rn = 1
     )
,orgs as (
SELECT trim(COALESCE(om.organization_name, custom_attributes.Account)) AS organization,
*
FROM "bronze_sales_prod"."intercom_all_conversations"
JOIN org_mapping om ON trim(lower(custom_attributes.Account)) = trim(lower(om.name))
)
, orgs_dates_metrics as (
select organization,
cast(from_unixtime(created_at) as date) AS conversation_ctreated_date,
array_agg(custom_attributes) custom_attributes_array,
approx_percentile(case when state = 'closed' then date_diff('minute', from_unixtime(created_at), from_unixtime(updated_at)) else 0 end,0.5) as median_resolution_time_minutes,
sum(case when state IN ('open', 'snoozed') then 1 else 0 end) open_conversations_count,
count(*) overall_conversations_count
from orgs
where organization is not null
group by 1,2
)
,last_year_org_dates as
(
SELECT distinct om.organization_name as organization,
  date_add('day', -sequence, current_date) AS date
FROM UNNEST(sequence(1, 365)) AS t(sequence)
  join org_mapping om on 1=1
WHERE
  date_add('day', -sequence, current_date) >= date_add('year', -1, current_date)
), counted_tags as (
SELECT organization
,cast(from_unixtime(created_at) as date) AS conversation_ctreated_date
,tag.name as tag
, json_extract_scalar(cast(source AS json), '$.author.email') AS author_email
,COUNT(*) AS count
FROM orgs
CROSS JOIN UNNEST(tags.tags) AS t(tag)
group by 1,2,3,4
),  counted_tags_map as (
SELECT organization, conversation_ctreated_date, author_email,
    MAP (
        ARRAY_AGG(tag),
        ARRAY_AGG(count)
    ) AS tags_counts
FROM
    counted_tags
group by 1,2,3
)
select od.organization as organization_name
, od.date
, t.author_email
, coalesce(t.tags_counts, MAP()) as tags_counts
, coalesce(o.custom_attributes_array, ARRAY[]) as custom_attributes_array
, COALESCE(o.median_resolution_time_minutes,0) as median_resolution_time_minutes
, COALESCE(o.open_conversations_count,0) as open_conversations_count
, COALESCE(o.overall_conversations_count,0) as overall_conversations_count
, cast(current_timestamp as timestamp(6)) as dbt_insert_time
from last_year_org_dates od
left join orgs_dates_metrics o on od.organization = o.organization and od.date = o.conversation_ctreated_date
left join counted_tags_map t on od.organization = t.organization and od.date = t.conversation_ctreated_date`
    const parsedSql = parser.parse(query, WarehouseType.Athena)
    expect(parsedSql).toBeDefined()
  })