IITDBGroup / gprom

GProM is a middleware that adds support for provenance to database backends.
http://www.cs.iit.edu/%7edbgroup/research/gprom.php
Apache License 2.0
9 stars 6 forks source link

Datalog queries lineage on postgres #94

Open lordpretzel opened 2 years ago

lordpretzel commented 2 years ago

Queries that work without optimization

Queries that work with optimization

Queries have results

Q2

HINT:  No operator matches the given name and argument types. You might need to add explicit type casts.
RUN /home/perm/semantic_opt_gprom/umflint/tpcq02/p_supplier_opt.sql SQL FOR TIMING p_supplier_opt STORE IN /home/perm/semantic_opt_gprom/umflint/tpcq02/results/p_supplier_opt.csv
RUN /home/perm/semantic_opt_gprom/umflint/tpcq02/p_supplier_unopt.sql SQL FOR TIMING p_supplier_unopt STORE IN /home/perm/semantic_opt_gprom/umflint/tpcq02/results/p_supplier_unopt.csv
psql:/home/perm/semantic_opt_gprom/umflint/tpcq02/p_supplier_unopt.sql:81: ERROR:  operator does not exist: bigint = character varying
LINE 71: FROM "supplier" AS F0) F12 ON ((((((((F5."V9" = F12."V9") AN...

Q8

       q2(date_part('YEAR', o_odate),(l_extend * (1 - l_discount)),n_name) :- supplier(s_key,x1,x2,s_nkey,x3,x4,x5),nation(s_nkey,n_name,x6,x7)\
,customer(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13),nation(c_nkey,x14,n_rkey,x15),region(n_rkey,'AMERICA',x16),orders(o_okey,x17,x18,x19,o_odate,x20,\
x21,x22,x23),lineitem(o_okey,l_pkey,s_key,x24,x25,l_extend,l_discount,x26,x27,x28,x29,x30,x31,x32,x33,x34),part(l_pkey,x35,x36,x37,'ECONOMY ANOD\
IZED STEEL',x38,x39,x40,x41),((o_odate >= '1995-01-01')),((o_odate < '1996-12-31')).
        q1(year,sum((CASE  WHEN (nation = 'BRAZIL') THEN volume ELSE 0 END)),sum(volume)) :- q2(year,volume,nation).
        q(year,(x / y)) :- q1(year,x,y).
        rp(x1,x2) :- rtpcq08(x1,x2).
        prov_q2(year,volume,nation) :- q2(year,volume,nation),prov_q1(year,V2,V3).
        prov_q1(year,x,y) :- prov_q_pre_2(year,x,y,year,V0),rp(year,V0).
        prov_q_pre_2(year,x,y,year,(x / y)) :- q1(year,x,y).
        prov_customer(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13) :- prov_q2_pre_0(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13,V0,V1,n_name),prov_q2(V0,V1,n_na\
me).
        prov_q2_pre_0(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13,date_part('YEAR', o_odate),(l_extend * (1 - l_discount)),n_name) :- supplier(s_key,x1,\
x2,s_nkey,x3,x4,x5),nation(s_nkey,n_name,x6,x7),customer(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13),nation(c_nkey,x14,n_rkey,x15),region(n_rkey,'AMERI\
CA',x16),orders(o_okey,x17,x18,x19,o_odate,x20,x21,x22,x23),lineitem(o_okey,l_pkey,s_key,x24,x25,l_extend,l_discount,x26,x27,x28,x29,x30,x31,x32\
,x33,x34),part(l_pkey,x35,x36,x37,'ECONOMY ANODIZED STEEL',x38,x39,x40,x41),((o_odate >= '1995-01-01')),((o_odate < '1996-12-31')).

q2(date_part('YEAR', o_odate),(l_extend * (1 - l_discount)),n_name) :- supplier(s_key,x1,x2,s_nkey,x3,x4,x5),nation(s_nkey,n_name,x6,x7)\
,customer(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13),nation(c_nkey,x14,n_rkey,x15),region(n_rkey,'AMERICA',x16),orders(o_okey,x17,x18,x19,o_odate,x20,\
x21,x22,x23),lineitem(o_okey,l_pkey,s_key,x24,x25,l_extend,l_discount,x26,x27,x28,x29,x30,x31,x32,x33,x34),part(l_pkey,x35,x36,x37,'ECONOMY ANOD\
IZED STEEL',x38,x39,x40,x41),((o_odate >= '1995-01-01')),((o_odate < '1996-12-31')).
        q1(year,sum((CASE  WHEN (nation = 'BRAZIL') THEN volume ELSE 0 END)),sum(volume)) :- @q2(year,volume,nation).
        q(year,(x / y)) :- @q1(year,x,y).
        rp(x1,x2) :- rtpcq08(x1,x2).
        prov_q2(year,volume,nation) :- @q2(year,volume,nation),@prov_q1(year,v2,v3).
        prov_q1(year,x,y) :- @prov_q_pre_2(year,x,y,year,v0),@rp(year,v0).
        prov_q_pre_2(year,x,y,year,(x / y)) :- @q1(year,x,y).
        prov_customer(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13) :- @prov_q2_pre_0(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13,v0,v1,n_name),@prov_q2(v0,v1,n_\
name).
        prov_q2_pre_0(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13,date_part('YEAR', o_odate),(l_extend * (1 - l_discount)),n_name) :- supplier(s_key,x1,\
x2,s_nkey,x3,x4,x5),nation(s_nkey,n_name,x6,x7),customer(c_ckey,x8,x9,c_nkey,x10,x11,x12,x13),nation(c_nkey,x14,n_rkey,x15),region(n_rkey,'AMERI\
CA',x16),orders(o_okey,x17,x18,x19,o_odate,x20,x21,x22,x23),lineitem(o_okey,l_pkey,s_key,x24,x25,l_extend,l_discount,x26,x27,x28,x29,x30,x31,x32\
,x33,x34),part(l_pkey,x35,x36,x37,'ECONOMY ANODIZED STEEL',x38,x39,x40,x41),((o_odate >= '1995-01-01')),((o_odate < '1996-12-31')).

Q15

This is the generated datalog, but the generated SQL is not correct

prov_supplier(s_suppkey,s_name,s_address,s1,s_phone,s2,s3) :-
    supplier(s_suppkey,s_name,s_address,s1,s_phone,s2,s3),
    q1(s_suppkey,rev),
    q2(rev),
    rtpcq15(s_suppkey,s_name,s_address,s_phone,rev).

q1(l_suppkey,sum((l_extp * (1 - l_discount)))) :-
    lineitem(l1,l2,l_suppkey,l3,l4,l_extp,l_discount,l5,l6,l7,l_shipdate,l8,l9,l10,l11,l12),
    ((l_shipdate >= '1996-01-01')),
    ((l_shipdate < '1996-04-01')).

q2(max(rev)) :- q1(q1,rev).
WITH _temp_view_0 AS (
SELECT /*+ materialize */ F0."l_suppkey" AS "A0", sum((F0."l_extp" * ((1)::float8 - F0."l_discount"))) AS "A1"
FROM (
SELECT F0."l_orderkey" AS "l1", F0."l_partkey" AS "l2", F0."l_suppkey" AS "l_suppkey", F0."l_linenumber" AS "l3", F0."l_quantity" AS "l4", F0."l_extendedprice" AS "l_extp", F0."l_discount" AS "l_discount", F0."l_tax" AS "l5", F0."l_returnflag" AS "l6", F0."l_linestatus" AS "l7", F0."l_shipdate" AS "l_shipdate", F0."l_commitdate" AS "l8", F0."l_receiptdate" AS "l9", F0."l_shipinstruct" AS "l10", F0."l_shipmode" AS "l11", F0."l_comment" AS "l12"
FROM "lineitem" AS F0) F0
WHERE ((F0."l_shipdate" >= '1996-01-01') AND (F0."l_shipdate" < '1996-04-01'))
GROUP BY F0."l_suppkey"),
_temp_view_1 AS (
SELECT /*+ materialize */ max(F0."A1") AS "A0"
FROM "q1" AS F0)
SELECT  DISTINCT F0."s_suppkey" AS "A0", F0."s_name" AS "A1", F0."s_address" AS "A2", F0."s1" AS "A3", F0."s_phone" AS "A4", F0."s2" AS "A5", F0."s3" AS "A6"
FROM ((((
SELECT F0."s_suppkey" AS "s_suppkey", F0."s_name" AS "s_name", F0."s_address" AS "s_address", F0."s_nationkey" AS "s1", F0."s_phone" AS "s_phone", F0."s_acctbal" AS "s2", F0."s_comment" AS "s3"
FROM "supplier" AS F0) F0 JOIN (
SELECT F0."A0" AS "s_suppkey", F0."A1" AS "rev"
FROM (SELECT * FROM _temp_view_0) F0) F1 ON ((F0."s_suppkey" = F1."s_suppkey"))) JOIN (
SELECT F0."A0" AS "rev"
FROM (SELECT * FROM _temp_view_1) F0) F2 ON ((F1."rev" = F2."rev"))) JOIN (
SELECT F0."A0" AS "s_suppkey", F0."A1" AS "s_name", F0."A2" AS "s_address", F0."A3" AS "s_phone", F0."A4" AS "rev"
FROM "rtpcq15" AS F0) F3 ON ((((((((F0."s_suppkey" = F3."s_suppkey") AND (F1."s_suppkey" = F3."s_suppkey")) AND (F0."s_name" = F3."s_name")) AND (F0."s_address" = F3."s_address")) AND (F0."s_phone" = F3."s_phone")) AND (F1."rev" = F3."rev")) AND (F2."rev" = F3."rev"))));
manips2002 commented 1 year ago

Q17: Works as expected

manips2002 commented 1 year ago

For, Q16, there are errors in generating sql files from provenance .dl files (both opt and unopt) -- For example: ./src/command_line/gprom -backend postgres -host 127.0.0.1 -user postgres -db semanticopt -port 5463 -passwd test -frontend dl -Osemantic_opt FALSE -Oflatten_dl TRUE -loglevel 3 -Pexecutor sql -queryFile umflint/tpcq16/part.dl

INFO(datalog_model.c:583) after making super variable names unique

prov_part(p_partkey,p1,p2,x1,x2,x3,p3,p4,p5) :- partsupp(p_partkey,ps_suppkey,ps1,ps2,ps3),part(p_partkey,p1,p2,x1,x2,x3,p3,p4,p5),qsizes(x3),((x1 != 'Brand#45')),((x2 NOT LIKE 'MEDIUM POLISHED%')),@qcomplaints(ps_suppkey,v0),@prov_qttlcom(ps_suppkey,v1).

INFO(datalog_model.c:584) after making variable name unique

    qcomplaints(s_suppkey,1) :- supplier(s_suppkey,s1,s2,s3,s4,s5,s_comment),((s_comment LIKE '%Customer%Complaints%')).

ERROR (exception.c:145) exception was thrown (RECOVERABLE) datalog_model.c - 284 - <failed assertion: LIST_LENGTH(headBinds) == LIST_LENGTH(hVars)

Error occured (datalog_model.c:284) failed assertion: LIST_LENGTH(headBinds) == LIST_LENGTH(hVars)

EXCEPTION Handler requested us to die because of exception at (datalog_model.c:284)

failed assertion: LIST_LENGTH(headBinds) == LIST_LENGTH(hVars)

manips2002 commented 1 year ago

Q15 -- p_lineitem_unopt -- still returning 0 rows..

manips2002 commented 1 year ago

Q11 -- works as expected

manips2002 commented 1 year ago

Q13 -- works as expected. However, the left outer join is replaced with a natural join. Also, the SQLLite version -- does not have the filter on o_comment..

manips2002 commented 1 year ago

Q18: lineitem-unopt produces 0 rows..

manips2002 commented 1 year ago

Q19: works as expected

manips2002 commented 1 year ago

Q20: works as expected

manips2002 commented 1 year ago

Q08 -- works as expected Note the SQLLite version is missing a join condition -- o_custkey = c_custkey