Open davidshiz opened 1 year ago
single table Q4.1 also has similar problem
SELECT
(LO_ORDERDATE DIV 10000) AS year,
C_NATION,
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
FROM lineorder_flat
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND P_MFGR in ( 'MFGR#1' , 'MFGR#2')
GROUP BY
year,
C_NATION
ORDER BY
year ASC,
C_NATION ASC;
What's the different between these two result sets?
What's the different between these two result sets? As above, the wrong result set has more NULL values
ACK
set global tianmu_groupby_parallel_degree=0;
SELECT
lo_orderdate,
lo_orderpriority,
sum(lo_revenue - lo_supplycost) AS profit
FROM lineorder
WHERE lo_shipmode = 'TRUCK' AND lo_orderdate <= 19980730 AND lo_orderpriority IN ('5-LOW')
GROUP BY
lo_orderdate,
lo_orderpriority
ORDER BY
lo_orderdate ASC,
lo_orderpriority ASC
LIMIT 10;
| top N ( |
| | project ( |
| | | project ( |
| | | | group by ( |
| | | | | project ( |
| | | | | | select ( |
| | | | | | | table("sys"."lineorder") [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "lineorder"."lo_revenue" NOT NULL, "lineorder"."lo_supplycost" NOT NULL, "li |
: neorder"."lo_shipmode" NOT NULL ] COUNT :
| | | | | | ) [ "lineorder"."lo_orderdate" NOT NULL <= int "19980730", "lineorder"."lo_shipmode" NOT NULL = char(10) "TRUCK", "lineorder"."lo_orderpriority" NOT NULL = char(15) "5-LOW" ] |
| | | | | ) [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, bigint["lineorder"."lo_revenue" NOT NULL] NOT NULL as "%4"."%4", bigint["lineorder"."lo_supplycost" NOT |
: NULL] NOT NULL as "%5"."%5", "sys"."sql_sub"("%4"."%4" NOT NULL, "%5"."%5" NOT NULL) NOT NULL as "%3"."%3" ] :
| | | | ) [ "lineorder"."lo_orderpriority" NOT NULL, "lineorder"."lo_orderdate" NOT NULL ] [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "sys"."sum" no nil ("%3" |
: ."%3" NOT NULL) NOT NULL as "%2"."%2" ] :
| | | ) [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "%2"."%2" NOT NULL as "profit" ] |
| | ) [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "profit" NOT NULL ] [ "lineorder"."lo_orderdate" ASC NOT NULL, "lineorder"."lo_orderpriority" ASC NOT NULL ] |
| ) [ bigint "10" ]
| project ( |
| | top N ( |
| | | group by ( |
| | | | project ( |
| | | | | table("sys"."lineorder") [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "lineorder"."lo_revenue" NOT NULL, "lineorder"."lo_supplycost" NOT NULL ] COUNT |
| | | | ) [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, bigint["lineorder"."lo_revenue" NOT NULL] NOT NULL as "%3"."%3", bigint["lineorder"."lo_supplycost" NOT N |
: ULL] NOT NULL as "%4"."%4", "sys"."sql_sub"("%3"."%3" NOT NULL, "%4"."%4" NOT NULL) NOT NULL as "%2"."%2" ] :
| | | ) [ "lineorder"."lo_orderpriority" NOT NULL, "lineorder"."lo_orderdate" NOT NULL ] [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "sys"."sum" no nil ("%2"." |
: %2" NOT NULL) NOT NULL as "%1"."%1" ] :
| | ) [ bigint "10" ] |
| ) [ "lineorder"."lo_orderdate" NOT NULL, "lineorder"."lo_orderpriority" NOT NULL, "%1"."%1" NOT NULL as "profit" ]
THRnew Number of threads: 36
Line 10: * 9 Thread 0x7ff9315a1700 (LWP 20073) "" BATgroup_internal (groups=0x7ff9315a0528, extents=0x7ff9315a0530, histo=0x0, b=0x7ff8e0007430, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 12: 10 Thread 0x7ff93119f700 (LWP 20075) "" BATgroup_internal (groups=0x7ff93119e528, extents=0x7ff93119e530, histo=0x0, b=0x7ff8e4000d30, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 16: 13 Thread 0x7ff93099b700 (LWP 20079) "" BATgroup_internal (groups=0x7ff93099a528, extents=0x7ff93099a530, histo=0x0, b=0x7ff8f0000f70, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 18: 14 Thread 0x7ff93079a700 (LWP 20080) "" BATgroup_internal (groups=0x7ff930799528, extents=0x7ff930799530, histo=0x0, b=0x7ff8c8e4fa80, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 22: 18 Thread 0x7ff92bbfd700 (LWP 20085) "" BATgroup_internal (groups=0x7ff92bbfc528, extents=0x7ff92bbfc530, histo=0x0, b=0x7ff924004e90, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 24: 19 Thread 0x7ff92b7fb700 (LWP 20087) "" BATgroup_internal (groups=0x7ff92b7fa528, extents=0x7ff92b7fa530, histo=0x0, b=0x7ff918001cb0, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 32: 24 Thread 0x7ff92abf5700 (LWP 20093) "" BATgroup_internal (groups=0x7ff92abf4528, extents=0x7ff92abf4530, histo=0x0, b=0x7ff8a0001820, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 43: 32 Thread 0x7ff92b9fc700 (LWP 20104) "" BATgroup_internal (groups=0x7ff92b9fb528, extents=0x7ff92b9fb530, histo=0x0, b=0x7ff8fc015400, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
Line 48: 36 Thread 0x7ff92b1f8700 (LWP 20111) "" BATgroup_internal (groups=0x7ff92b1f7528, extents=0x7ff92b1f7530, histo=0x0, b=0x7ff8b8001b10, s=0x0, g=0x0, e=0x0, h=0x0, subsorted=false)
for (r = 0; r < cnt; r++) {
oid o = canditer_next(&ci);
p = o - b->hseqbase;
if ((v = bgrps[w[p]]) == 0xFF && ngrp < 256) {
bgrps[w[p]] = v = (unsigned char) ngrp++;
maxgrppos = r;
if (extents)
exts[v] = o;
}
ngrps[r] = v;
if (r > 0 && v < ngrps[r - 1])
gn->tsorted = false;
if (histo)
cnts[v]++;
}
gid = gids ? gids[i] - min : (oid) i; \
x = vals[ci->seq + i - seqb]; \
if (nil_if_empty && \
!(seen[gid >> 5] & (1U << (gid & 0x1F)))) { \
seen[gid >> 5] |= 1U << (gid & 0x1F); \
sums[gid] = 0; \
} \
sums[gid] += x; \
group and aggregation operations are separated to facilitate the use of a more streamlined data structure to carry the group, and facilitate the slicing of data
The selection of group's data structure
The selection of aggregated data structure
Separation of threads and selection of critical sections
| {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "8246523.25"
},
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": true,
"table": {
"table_name": "lineitem",
"access_type": "ALL",
"rows_examined_per_scan": 32986053,
"rows_produced_per_join": 32986051,
"filtered": "100.00",
"cost_info": {
"read_cost": "1649312.65",
"eval_cost": "6597210.21",
"prefix_cost": "8246523.25",
"data_read_per_join": "11G"
},
"used_columns": [
"l_orderkey",
"l_linenumber",
"l_quantity"
]
}
}
}
} |
Have you read the Contributing Guidelines on issues?
Please confirm if bug report does NOT exists already ?
Describe the problem
10G SSB data, return incorrect result set, 1G data, result is nomal
The wrong result set has more NULL values than the normal result set, as follows:
Expected behavior
No response
How To Reproduce
No response
Environment
Are you interested in submitting a PR to solve the problem?