Closed anayrat closed 3 years ago
Agreed, nice catch!
Ok, so I think I am close to the final rewrite:
SELECT
-- Ordered aggregate of top 20 metrics for each kind of stats (most executed, most filetered, least filtered...)
srvid, qualid, queryid, dbid, userid,
tstzrange(min(min_constvalues_ts) , max(max_constvalues_ts) ,'[]') ,
array_agg((constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num)::qual_values ORDER BY occurences_rank ASC) FILTER (WHERE occurences_rank <=20) mu,
array_agg((constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num)::qual_values ORDER BY filtered_rank ASC) FILTER (WHERE filtered_rank <=20) mf,
array_agg((constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num)::qual_values ORDER BY filtered_rank DESC) FILTER (WHERE filtered_rank >= nb_lines - 20) lf, -- Keep last 20 lines from the same window function
array_agg((constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num)::qual_values ORDER BY execution_rank ASC) FILTER (WHERE execution_rank <=20) me,
array_agg((constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num)::qual_values ORDER BY err_estimate_ratio_rank ASC) FILTER (WHERE err_estimate_ratio_rank <=20) mer,
array_agg((constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num)::qual_values ORDER BY err_estimate_num_rank ASC) FILTER (WHERE err_estimate_num_rank <=20) men
FROM (
-- Establish rank for different stats (occurences, execution...) for each distinct constvalue
SELECT srvid, qualid, queryid, dbid, userid,
min(mints) OVER (W) min_constvalues_ts, max(maxts) OVER (W) max_constvalues_ts,
constvalues, sum_occurences, sum_execution_count, sum_nbfiltered, avg_mean_err_estimate_ratio, avg_mean_err_estimate_num,
row_number() OVER (W ORDER BY sum_occurences DESC) occurences_rank,
row_number() OVER (W ORDER BY CASE WHEN sum_execution_count = 0 THEN 0 ELSE sum_nbfiltered / sum_execution_count::numeric END DESC) filtered_rank,
row_number() OVER (W ORDER BY sum_execution_count DESC) execution_rank,
row_number() OVER (W ORDER BY avg_mean_err_estimate_ratio DESC) err_estimate_ratio_rank,
row_number() OVER (W ORDER BY avg_mean_err_estimate_num DESC) err_estimate_num_rank,
sum(1) OVER (W) nb_lines
FROM (
-- We group by constvalues and perform some aggregate to have stats on distinct constvalues
SELECT srvid, qualid, queryid, dbid, userid,constvalues,
min(ts) mints, max(ts) maxts ,
sum(occurences) as sum_occurences,
sum(nbfiltered) as sum_nbfiltered,
sum(execution_count) as sum_execution_count,
avg(mean_err_estimate_ratio) as avg_mean_err_estimate_ratio,
avg(mean_err_estimate_num) as avg_mean_err_estimate_num
FROM powa_qualstats_constvalues_history_current
WHERE srvid = 1
GROUP BY srvid, qualid, queryid, dbid, userid,constvalues
) distinct_constvalues
WINDOW W AS (PARTITION BY srvid, qualid, queryid, dbid, userid)
) ranked_constvalues
GROUP BY srvid, qualid, queryid, dbid, userid
;
It need more tests and review, but this query is much more faster than previous one from 20 minutes (maybe more) to 3 seconds
Just to not forget, I spotted several bugs in $title
Several columns have the same name:
The
order by
should beASC
I try to rewrite this query with window function, I'll address all this issues.