xnuinside / simple-ddl-parser

Simple DDL Parser to parse SQL (HQL, TSQL, AWS Redshift, BigQuery, Snowflake and other dialects) ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc. & table properties, types, domains, etc.
MIT License
179 stars 40 forks source link

Add Snowflake External Table with Virtual Column support a kind of Computed Column as T-SQL #218

Closed dmaresma closed 8 months ago

dmaresma commented 8 months ago

I would like to support CREATE EXTERNAL TABLE statement support as the following test here the reference in Snowflake documentation https://docs.snowflake.com/en/sql-reference/sql/create-external-table

def test_virtual_column_table():
    ddl = """
    create or replace external table if not exists TABLE_DATA_SRC.EXT_PAYLOAD_MANIFEST_WEB(
        "type" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 1), '=', 2)),
        "year" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 2), '=', 2)),
        "month" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 3), '=', 2)),
        "day" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 4), '=', 2)),
        "path" VARCHAR(255) AS (METADATA$FILENAME))
    partition by ("type", "year", "month", "day", "path")
    location=@ADL_Azure_Storage_Account_Container_Name/
    auto_refresh=false
    file_format=(TYPE=JSON NULL_IF=())
    ;
    """
    result_ext_table = DDLParser(ddl, normalize_names=True, debug=True).run(
        output_mode="snowflake"
    )

       expected_ext_table = [
        {
            "alter": {},
            "checks": [],
            "clone": None,
            "columns": [
                {
                    "name": "type",
                    "type": "VARCHAR",
                    "size": 255,
                    "comment": "",
                    "references": None,
                    "unique": False,
                    "nullable": True,
                    "default": None,
                    "check": None,
                   "generated" : {"as"  : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 1), '=', 2)" , "stored": False, "always" : True}
                },
                {
                    "name": "year",
                    "type": "VARCHAR",
                    "size": 255,
                    "comment": "",
                    "references": None,
                    "unique": False,
                    "nullable": True,
                    "default": None,
                    "check": None,
                   "generated" : {"as"  : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 2), '=', 2)" , "stored": False, "always" : True}
                },
                {
                    "name": "month",
                    "type": "VARCHAR",
                    "size": 255,
                    "comment": "",
                    "references": None,
                    "unique": False,
                    "nullable": True,
                    "default": None,
                    "check": None,
                   "generated" : {"as" : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 3), '=', 2)", "stored": False, "always" : True}
                },
                {
                    "name": "day",
                    "type": "VARCHAR",
                    "size": 255,
                    "comment": "",
                    "references": None,
                    "unique": False,
                    "nullable": True,
                    "default": None,
                    "check": None,
                   "generated" : {"as" : "SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 4), '=', 2)", "stored": False, "always" : True}
                },
                {
                    "name": "path",
                    "type": "VARCHAR",
                    "size": 255,
                    "comment": "",
                    "references": None,
                    "unique": False,
                    "nullable": True,
                    "default": None,
                    "check": None,
                   "generated" : {"as" : "METADATA$FILENAME)", "stored": False, "always" : True}
                }
            ],
            "constraints": {
                "primary_keys": []
            },
            "index": [],
            "partitioned_by": []
            "partition_by" : ["type", "year", "month", "day", "path"],
            "primary_key": [],
            "primary_key_enforced": None,
            "location" : "@ADL_Azure_Storage_Account_Container_Name/",
            "auto_refresh" : False,
            "schema": "TABLE_DATA_SRC",
            "table_name": "EXT_PAYLOAD_MANIFEST_WEB",
        }
    ]

    assert result_ext_table == expected_ext_table

Implementing Virutal Column "type" VARCHAR(255) AS (SPLIT_PART(SPLIT_PART(METADATA$FILENAME, '/', 1), '=', 2))

improve generated as columns attribute, location , file_format and auto_refresh as table attributes

check partitioned_by still supported

Additional context ICEBERG Table is comming soon 👍