[x] Make sure you specify the "read" dialect eg. parse_one(sql, read="spark")
[x] Make sure you specify the "write" dialect eg. ast.sql(dialect="duckdb")
[x] Check if the issue still exists on main
Fully reproducible code snippet
I'm writing a transformer that remap duckdb named parameters into a valid type based on the user's inputs.
def resolve_user_params(query: str, params: dict[str, Any], dialect='duckdb') -> str:
errors: set[str] = set()
def _transform(node: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
if isinstance(node, sqlglot.expressions.Placeholder):
key = node.name
if key not in params:
errors.add(f'Variable ${key} is an unknown user_param')
return node
value = params[key]
match value:
case bool():
return sqlglot.expressions.Boolean(this=value)
case int():
return sqlglot.expressions.Literal(this=value, is_string=False)
case float():
return sqlglot.expressions.Literal(this=value, is_string=False)
case str():
return sqlglot.expressions.Literal(this=value, is_string=True)
case _:
errors.add(f'Variable ${key} is not a valid type, got {type(value)}')
return node
return node
parsed_query = sqlglot.parse_one(query, read=dialect)
# print(repr(parsed_query))
transformed_query = parsed_query.transform(_transform)
final_query = transformed_query.sql(dialect=dialect, comments=False)
print('query', query)
print(' - params', params)
print(' - errors', errors)
print(' - final_query', final_query)
return final_query
The function works with a basic query like:
resolve_user_params(
query="SELECT * FROM taxi_zones WHERE LocationID > $location_id_min AND LocationID IS NOT NULL",
params={
"location_id_min": 124,
},
)
# query SELECT * FROM taxi_zones WHERE LocationID > $location_id_min AND LocationID IS NOT NULL
# - params {'location_id_min': 124}
# - errors set()
# - final_query SELECT * FROM taxi_zones WHERE LocationID > 124 AND NOT LocationID IS NULL
but it fails when I pass multiple named parameters:
resolve_user_params(
query="SELECT * FROM taxi_zones WHERE LocationID > $location_id_min AND Borough = $borough",
params={
"location_id_min": 124,
"borough": 12,
},
)
Traceback (most recent call last):
File ".venv/lib/python3.12/site-packages/sqlglot/tokens.py", line 970, in tokenize
self._scan()
File ".venv/lib/python3.12/site-packages/sqlglot/tokens.py", line 1003, in _scan
self._scan_keywords()
File ".venv/lib/python3.12/site-packages/sqlglot/tokens.py", line 1134, in _scan_keywords
if self._scan_string(word):
^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/sqlglot/tokens.py", line 1306, in _scan_string
text = self._extract_string(end, unescape_sequences=token_type != TokenType.RAW_STRING)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/sqlglot/tokens.py", line 1385, in _extract_string
raise TokenError(f"Missing {delimiter} from {self._line}:{self._start}")
sqlglot.errors.TokenError: Missing $location_id_min AND Borough = $ from 1:44
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File ".venv/lib/python3.12/site-packages/marimo/_runtime/runner/cell_runner.py", line 302, in run
return_value = execute_cell(cell, self.glbls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/marimo/_ast/cell.py", line 460, in execute_cell
return eval(cell.last_expr, glbls)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/var/folders/pg/0dm9b2p54yn_wqd89jd7j_wm0000gn/T/marimo_4911/__marimo__cell_lEQa__output.py", line 31, in <module>
File "/var/folders/pg/0dm9b2p54yn_wqd89jd7j_wm0000gn/T/marimo_4911/__marimo__cell_bkHC_.py", line 26, in resolve_user_params
parsed_query = sqlglot.parse_one(query, read='duckdb')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/sqlglot/__init__.py", line 136, in parse_one
result = dialect.parse(sql, **opts)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/sqlglot/dialects/dialect.py", line 518, in parse
return self.parser(**opts).parse(self.tokenize(sql), sql)
^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/sqlglot/dialects/dialect.py", line 535, in tokenize
return self.tokenizer.tokenize(sql)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/sqlglot/tokens.py", line 975, in tokenize
raise TokenError(f"Error tokenizing '{context}'") from e
sqlglot.errors.TokenError: Error tokenizing 'cationID > $location_id_min AND Borough = $boroug'
Before you file an issue
parse_one(sql, read="spark")
ast.sql(dialect="duckdb")
Fully reproducible code snippet I'm writing a transformer that remap duckdb named parameters into a valid type based on the user's inputs.
The function works with a basic query like:
but it fails when I pass multiple named parameters:
Official Documentation