Closed GoogleCodeExporter closed 9 years ago
for $x in $X
group by $nation_key := $x.nation_key with $x
return {
"nation_key": $x.nation_key,
"aggregates": for $y in $x
order by $y.sum_price desc
limit 3
return {
"order_date": $y.orderdate,
"sum_price": $y.sum
}
}
Here, after grouping, x becomes a bag, so this might your intended query:
for $x in $X
group by $nation_key := $x.nation_key with $x
return {
"nation_key": $nation_key,
"aggregates": for $y in $x
order by $y.sum_price desc
limit 3
return {
"order_date": $y.orderdate,
"sum_price": $y.sum
}
}
Original comment by buyingyi@gmail.com
on 10 Jun 2014 at 6:49
I tried your suggested one, and I saw error message:
null []
And I tried the subquery:
use dataverse TPCH;
let $t := for $nation in dataset nation
for $sn in dataset selectedNations
where $nation.n_nationkey = $sn.sn_nationkey /*+ indexnl */
return {
"nation_key": $nation.n_nationkey,
"n_name": $nation.n_name
}
for $n in $t
for $order in dataset orders
for $customer in dataset customers
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $orderdate := $order.o_orderdate, $nation_key := $n.nation_key with
$order
let $sum := sum(for $o in $order return $o.o_totalprice)
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": $sum
}
which runs a while, but shows nothing on output console
I tweak it a bit to show only a few results:
use dataverse TPCH;
let $t := for $nation in dataset nation
for $sn in dataset selectedNations
where $nation.n_nationkey = $sn.sn_nationkey /*+ indexnl */
return {
"nation_key": $nation.n_nationkey,
"n_name": $nation.n_name
}
for $n in $t
for $order in dataset orders
for $customer in dataset customers
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $orderdate := $order.o_orderdate, $nation_key := $n.nation_key with
$order
let $sum := sum(for $o in $order return $o.o_totalprice)
order by $sum
limit 3
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": $sum
}
then I saw the error;
Could not generate operator descriptor for operator
edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator@1
461e84e [AlgebricksException]
Original comment by richb...@gmail.com
on 11 Jun 2014 at 3:29
use dataverse TPCH;
let $t := for $nation in dataset nation
for $sn in dataset selectedNations
where $nation.n_nationkey = $sn.sn_nationkey /*+ indexnl */
return {
"nation_key": $nation.n_nationkey,
"n_name": $nation.n_name
}
for $n in $t
for $order in dataset orders
for $customer in dataset customers
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $orderdate := $order.o_orderdate, $nation_key := $n.nation_key with
$order
let $sum := sum(for $o in $order return $o.o_totalprice)
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": $sum
}
That query picks up a nested loop join between $t and orders because we don't
change the join order from what you typed in current version.
You can try this query, by making customer goes before orders:
use dataverse TPCH;
let $t := for $nation in dataset nation
for $sn in dataset selectedNations
where $nation.n_nationkey = $sn.sn_nationkey /*+ indexnl */
return {
"nation_key": $nation.n_nationkey,
"n_name": $nation.n_name
}
for $n in $t
for $customer in dataset customers
for $order in dataset orders
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $orderdate := $order.o_orderdate, $nation_key := $n.nation_key with
$order
let $sum := sum(for $o in $order return $o.o_totalprice)
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": $sum
}
Original comment by buyingyi@gmail.com
on 11 Jun 2014 at 4:56
But what about the NULL error - that still is a bug we'll need to find/fix.
Original comment by dtab...@gmail.com
on 11 Jun 2014 at 4:59
#1 After switching the order of join, it works.
#2 But I still see the null[] error, even if I change the last part of query to
something simpler:
for $x in $X
return {
"nation_key": $x.nation_key
}
Original comment by richb...@gmail.com
on 11 Jun 2014 at 5:39
Right, the following query does not work, because the subplan doesn't get
flatten in that case.
use dataverse TPCH;
let $t := for $nation in dataset nation
for $sn in dataset selectedNations
where $nation.n_nationkey = $sn.sn_nationkey /*+ indexnl */
return {
"nation_key": $nation.n_nationkey,
"n_name": $nation.n_name
}
let $X := (
for $n in $t
for $order in dataset orders
for $customer in dataset customers
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $orderdate := $order.o_orderdate, $nation_key := $n.nation_key with
$order
let $sum := sum(for $o in $order return $o.o_totalprice)
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": $sum
})
for $x in $X
group by $nation_key := $x.nation_key with $x
return {
"nation_key": $x.nation_key,
"aggregates": for $y in $x
order by $y.sum_price desc
limit 3
return {
"order_date": $y.orderdate,
"sum_price": $y.sum
}
}
Original comment by buyingyi@gmail.com
on 11 Jun 2014 at 7:21
Can you elaborate more? What do you mean by "subplan doesn't get flatten"?
Is this a valid query in AQL? Or is there some limitation in the current
implementation? If the latter, is there workaround or other ways to express?
Original comment by richb...@gmail.com
on 11 Jun 2014 at 5:05
It's a valid AQL, but there is some bug in rewriting rules.
Your workaround query should work:
let $t := for $nation in dataset nation
for $sn in dataset selectedNations
where $nation.n_nationkey = $sn.sn_nationkey /*+ indexnl */
return {
"nation_key": $nation.n_nationkey,
"n_name": $nation.n_name
}
for $n in $t
for $customer in dataset customers
for $order in dataset orders
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $orderdate := $order.o_orderdate, $nation_key := $n.nation_key with
$order
let $sum := sum(for $o in $order return $o.o_totalprice)
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": $sum
}
Original comment by buyingyi@gmail.com
on 11 Jun 2014 at 7:15
That's a subquery, but not a workaround.
There is one more step that I want to group by nation_key and nest other
attributes in $X into a child collection. In addition, I want a top 3 operation
on the nested collection to make the result small.
Is this possible?
Original comment by richb...@gmail.com
on 11 Jun 2014 at 8:47
There are multiple bugs caught by this query.
I tried various workaround, the closest workaround is this (I omit the index
join between nation and selectednation and several dataset name are changed
according to my local dataverse):
use dataverse tpch;
for $x in (
for $n in dataset Nation
for $customer in dataset Customer
for $order in dataset Orders
where $order.o_custkey = $customer.c_custkey
and $customer.c_nationkey = $n.n_nationkey
group by $nation_key := $n.n_nationkey, $orderdate := $order.o_orderdate with $order
return {
"nation_key": $nation_key,
"order_date": $orderdate,
"sum_price": sum(for $o in $order return $o.o_totalprice)
}
)
group by $nation_key := $x.nation_key with $x
return {
"nation_key": $nation_key,
"sum_price": for $i in $x
group by $od := $i.order_date with $i
let $sum := sum(for $s in $i return $s.sum_price)
order by $sum desc
limit 3
return{
"orderdate": $od,
"sum_price": $sum
}
}
However, the limit operator doesn't clear its state in the close() call such
that for each partition, only the first tuple contains three nested-records,
later tuples contain [].
I will fix that.
Original comment by buyingyi@gmail.com
on 11 Jun 2014 at 11:19
[deleted comment]
I record the bugs here before forgetting:
1 : type issue in collections. The original query fails compilation because
of that.
2 : group-by key ordering cause wrong partitioning inference, if we change
"group by $nation_key := $n.n_nationkey, $orderdate := $order.o_orderdate with
$order" to "group by $orderdate := $order.o_orderdate, $nation_key :=
$n.n_nationkey with $order", wrong results will be produced.
3 : limit operator runtime doesn't clear states.
Original comment by buyingyi@gmail.com
on 11 Jun 2014 at 11:22
Original comment by buyingyi@gmail.com
on 12 Jun 2014 at 6:03
Original comment by buyingyi@gmail.com
on 12 Jun 2014 at 6:05
Original comment by buyingyi@gmail.com
on 15 Oct 2014 at 1:43
[deleted comment]
The workaround query works now.
But the original query with two "let" statements has not worked yet, because
the compiler has several issues in dealing with the resulting logical plans.
Original comment by buyingyi@gmail.com
on 15 Oct 2014 at 1:45
Original comment by buyingyi@gmail.com
on 23 Oct 2014 at 10:52
Original comment by buyingyi@gmail.com
on 25 Oct 2014 at 7:46
Original comment by buyingyi@gmail.com
on 2 Nov 2014 at 9:43
Original issue reported on code.google.com by
richb...@gmail.com
on 10 Jun 2014 at 6:36