Open drizk1 opened 2 months ago
Switching to make the macro logic a function and then interpolating into that has been relatively successful (barring breaking tidy selection which should be fixable.) Source code for @select
and @filter
are below for convenience.
Altho my preference for mutate, filter and summarize is to stay as a macro.
so !!
interpolation works in
@group_by
@select
@arrange
@distinct
@count
function parse_interpolation2(var_expr)
var_expr = MacroTools.postwalk(var_expr) do x
if @capture(x, !!variable_Symbol)
return esc(variable)
elseif @capture(x, !!expr_)
return "expr"
elseif isa(x, Vector{String})
return join(["\"" * str * "\"" for str in x], ", ")
elseif isa(x, Vector{Symbol})
return join(string.(x), ", ")
else
return x
end
end
return var_expr
end
col_names = [:mpg, :vs, :carb]
@chain db_table(con, :mtcars) TidierDB.@select(!!col_names) @aside @show_query(_)
## debug print messages
b4 (:($(Expr(:escape, :col_names))),)
expr in qoute b4 interp: (:($(Expr(:escape, :col_names))),)
exprsstring after interp: [[:mpg, :vs, :carb]]
fxn expr ([:mpg, :vs, :carb],)
## end debug print messages
SELECT mpg, vs, carb
FROM mtcars
The challenge is now getting it work with @filter
@summarize
and @mutate
. While the above example works, the expression is evaluated before it is passed to the function with the logic so it receives which throws an error. or mixing in column names creates an issue. where ERROR: MethodError: no method matching isless(::Int64, ::Symbol)
conditions = 4
@chain db_table(con, :mtcars) begin
TidierDB.@filter(!!conditions > 4)
end
## debug print messages
conditions : (:($(Expr(:escape, :conditions)) > 4),)
conditions_expr: Bool[0]
## end debug print messages
once we can get a fix for this, the rest of interpolation should be smooth sailing ( minus some fixable bugs/breaks)
macro select(sqlquery, exprs...)
exprs = parse_blocks(exprs...)
exprs = parse_interpolation2.(exprs)
println("b4 ", exprs)
return quote
println("expre: ", $exprs)
exprs_str = $(Expr(:vect, map(x -> isa(x, Symbol) ? string(x) : x, exprs)...))
println("exprsstring: " ,exprs_str)
$(esc(sqlquery)) = select_function($(esc(sqlquery)), exprs_str...)
end
end
function select_function(sqlquery, exprs...)
columns = parse_tidy_db(exprs, sqlquery.metadata)
columns_str = join(["SELECT ", join([string(column) for column in columns], ", ")])
sqlquery.select = columns_str
sqlquery.metadata.current_selxn .= 0
for col in columns
if occursin(".", col)
table_col_split = split(col, ".")
table_name, col_name = table_col_split[1], table_col_split[2]
for idx in eachindex(sqlquery.metadata.current_selxn)
if sqlquery.metadata.table_name[idx] == table_name &&
sqlquery.metadata.name[idx] == col_name
sqlquery.metadata.current_selxn[idx] = 2
end
end
else
matching_indices = findall(sqlquery.metadata.name .== col)
sqlquery.metadata.current_selxn[matching_indices] .= 1
end
end
return sqlquery
end
macro filter(sqlquery, conditions...)
# Process the conditions
conditions = parse_blocks(conditions...)
conditions = parse_interpolation2.(conditions)
println("conditions : ", conditions)
# Construct the final expression to be returned by the macro
return quote
sq = $(esc(sqlquery))
# Ensure the conditions are treated as expressions and then convert to strings
conditions_expr = map(x -> string(x), $(Expr(:vect, conditions...)))
println("conditions_expr: ", conditions_expr)
# Call the filter function with the SQL query and the stringified conditions
filter_function(sq, conditions_expr...)
end
end
function filter_function(sqlquery, conditions...)
if isa(sqlquery, SQLQuery)
if !sqlquery.is_aggregated
cte_name = "cte_" * string(sqlquery.cte_count + 1)
combined_conditions = String[]
for condition in conditions
condition_str = string(expr_to_sql(condition, sqlquery))
condition_str = replace(condition_str, "'\"" => "'", "'\"" => "'", "\"'" => "'", "[" => "(", "]" => ")")
push!(combined_conditions, condition_str)
end
combined_condition_str = join(combined_conditions, " AND ")
new_cte = CTE(name=cte_name, select="*", from=(isempty(sqlquery.ctes) ? sqlquery.from : last(sqlquery.ctes).name), where=combined_condition_str)
push!(sqlquery.ctes, new_cte)
sqlquery.from = cte_name
sqlquery.cte_count += 1
else
aggregated_columns = Set{String}()
if !isempty(sqlquery.select)
for part in split(sqlquery.select, ", ")
if occursin(" AS ", part)
aggregated_column = strip(split(part, " AS ")[2])
push!(aggregated_columns, aggregated_column)
end
end
end
non_aggregated_conditions = String[]
groupby_columns = split(replace(sqlquery.groupBy, "GROUP BY " => ""), ", ")
groupby_columns = strip.(groupby_columns)
for condition in conditions
condition_str = string(condition, sqlquery)
condition_str = replace(condition_str, "'\"" => "'", "\"'" => "'", "[" => "(", "]" => ")")
condition_involves_aggregated_column = any(col -> occursin(Regex("\\b$col\\b"), condition_str), aggregated_columns)
sqlquery.where = ""
if !condition_involves_aggregated_column && any(col -> occursin(Regex("\\b$col\\b"), condition_str), groupby_columns)
main_query_having = !isempty(sqlquery.having) ? sqlquery.having * " AND " * condition_str : "HAVING " * condition_str
sqlquery.having = main_query_having
sqlquery.where = "" # Clearing sqlquery.where to prevent carrying over conditions
else
push!(non_aggregated_conditions, condition_str)
end
end
if !isempty(non_aggregated_conditions)
combined_conditions = join(non_aggregated_conditions, " AND ")
cte_name = "cte_" * string(sqlquery.cte_count + 1)
new_cte = CTE(name=cte_name, select=sqlquery.select, from=(isempty(sqlquery.ctes) ? sqlquery.from : last(sqlquery.ctes).name), groupBy=sqlquery.groupBy, having=sqlquery.having)
push!(sqlquery.ctes, new_cte)
sqlquery.select = "*"
sqlquery.groupBy = ""
sqlquery.having = ""
sqlquery.where = "WHERE " * join(non_aggregated_conditions, " AND ")
sqlquery.from = cte_name
sqlquery.cte_count += 1
end
end
else
error("Expected sqlquery to be an instance of SQLQuery")
end
return sqlquery
end
Current Obstacle/Impasse:
how to partially evaluate an expression like :(Expr(:escape, :conditions) > 4)
so that it only replaces conditions with :mpg, but does not evaluate and throw an error. It needs to not evaluate mpg > 4
so that it can become part of the sql query as a string. This is the challenge for @summarize
, @mutate
, and @filter
.
conditions = :mpg #for ex, but could be string etc
macro test2()
quote $(Expr(:escape, (:conditions) > 4 )) end
end
@test2
#ERROR: LoadError: MethodError: no method matching isless(::Int64, ::Symbol)
I have tried
$$
with nested quotes - no change in issue
Currently to use
!!
interpolation, a user needs to write the belowHowever, ideally, TidierDB would replicate TidierData.jl's parsing (no
add_interp_param!
for ex)Initially, I thought I had this working but using
eval
causes scoping issues when it is exported as part of a package so it doesnt work (!!var_name
It will say that the var_name is no assigned.) the line fromSo to created a temp fix using a struct and context as below to enable
!!var_name
to work when used withadd_interp_param!
The problem now comes up in function writing issue where you want to interpolate functions. #20 where macros must be used instead (but that has limits as well ie macro will only put what it sees and can use a variable defined elsewhere.)
I have tried retrofitting the TidierData.jl one with no avail either, but I suspect there is a way.
I plan to continue experimenting (with limited expectation for success) and I am very open for help, as fixing this would really smooth out TidierDBs ability to be part of a pipeline