simlaudato / asterixdb

Automatically exported from code.google.com/p/asterixdb
0 stars 0 forks source link

Inverted Index left-outer-join throws "The subtree does not have any data source. [AlgebricksException]" #864

Open GoogleCodeExporter opened 8 years ago

GoogleCodeExporter commented 8 years ago
What steps will reproduce the problem?
In main branch, the following Q1 and Q2 throws the exception, "The subtree does 
not have any data source. [AlgebricksException]"

//--------- DDL
drop dataverse test if exists;
create dataverse test;
use dataverse test;

create type TwitterUserType as closed {
    screen-name: string,
    lang: string,
    friends-count: int32,
    statuses-count: int32,
    name: string,
    followers-count: int32
} 

create type TweetMessageType as closed {
    tweetid: int64,
        user: TwitterUserType,
        sender-location: point,
    send-time: datetime,
        referred-topics: {{ string }},
    message-text: string,
    countA: int32,
    countB: int32
}

create dataset TweetMessages(TweetMessageType)
primary key tweetid;

create index twmSndLocIx on TweetMessages(sender-location) type rtree;
create index msgCountAIx on TweetMessages(countA) type btree;
create index msgKeywordIx on TweetMessages(message-text) type keyword;
create index msgNgramIx on TweetMessages(message-text) type ngram(3);
create index topicKeywordIx on TweetMessages(referred-topics) type keyword;

//--------- Q1
for $t1 in dataset('TweetMessages')
let $area := create-circle($t1.sender-location, 0.05)
where $t1.countA > int32("240")
order by $t1.tweetid
return {
    "tweet": {"id": $t1.tweetid, "topics" : $t1.referred-topics, "area":$area} ,             
    "similar-tweets": for $t2 in dataset('TweetMessages')
                      let $sim := similarity-jaccard-check($t1.referred-topics, $t2.referred-topics, 0.5f)
              where $sim[0] and 
                      $t2.tweetid != $t1.tweetid
                      order by $t2.tweetid
                      return {"id": $t2.tweetid, "topics" : $t2.referred-topics}
};

//--------- Q2
for $t1 in dataset('TweetMessages')
let $area := create-circle($t1.sender-location, 0.05)
where $t1.countB > int32("240")
order by $t1.tweetid
return {
    "tweet": {"id": $t1.tweetid, "topics" : $t1.referred-topics, "area":$area} ,             
    "similar-tweets": for $t2 in dataset('TweetMessages')
                      let $sim := similarity-jaccard-check($t1.referred-topics, $t2.referred-topics, 0.5f)
              where $sim[0] and 
                      $t2.tweetid != $t1.tweetid
                      order by $t2.tweetid
                      return {"id": $t2.tweetid, "topics" : $t2.referred-topics}
};

Reason behind this exception:
The following query Q3 works, where the outer dataset has a predicate on 
primary index.

//------------ Q3
for $t1 in dataset('TweetMessages')
let $area := create-circle($t1.sender-location, 0.05)
where $t1.tweetid > int64("240")
order by $t1.tweetid
return {
    "tweet": {"id": $t1.tweetid, "topics" : $t1.referred-topics, "area":$area} ,             
    "similar-tweets": for $t2 in dataset('TweetMessages')
                      let $sim := similarity-jaccard-check($t1.referred-topics, $t2.referred-topics, 0.5f)
              where $sim[0] and 
                      $t2.tweetid != $t1.tweetid
                      order by $t2.tweetid
                      return {"id": $t2.tweetid, "topics" : $t2.referred-topics}
};

It seems that left-outer join based on Inverted index doesn't work when a 
predicate in the outer dataset may not use primary index. In one more deeper 
level, during query optimization, OptimizableOperatorSubTree.initFromSubTree() 
does capture the following pattern A of a plan but does NOT capture a pattern B 
from the above problematic queries. Then, down the road, the query optimizer 
throws exception as shown in the following exception stack.

pattern A:
(select)? <-- (assign | unnest)* <-- (datasource scan | unnest-map)

pattern B: (a case from the above problematic queries)
assgin <-- select <-- assign <-- datasource scan

------------------------ plan snippet ----------------
    assign [$$1] <- [function-call: asterix:create-circle, Args:[%0->$$38, ADouble: {0.05}]] -- |PARTITIONED|
      select (function-call: algebricks:gt, Args:[%0->$$37, AInt32: {240}]) -- |PARTITIONED|
        assign [$$36, $$38, $$37] <- [function-call: asterix:field-access-by-index, Args:[%0->$$0, AInt32: {4}], function-call: asterix:field-access-by-index, Args:[%0->$$0, AInt32: {2}], function-call: asterix:field-access-by-index, Args:[%0->$$0, AInt32: {6}]] -- |PARTITIONED|
          data-scan []<-[$$34, $$0] <- test:TweetMessages -- |PARTITIONED|
            empty-tuple-source -- |PARTITIONED|
----------------------------------------------------

exception stack:
edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException: The 
subtree does not have any data source.
    at edu.uci.ics.asterix.optimizer.rules.am.OptimizableOperatorSubTree.getPrimaryKeyVars(OptimizableOperatorSubTree.java:199)
    at edu.uci.ics.asterix.optimizer.rules.am.InvertedIndexAccessMethod.copyAndReinitProbeSubTree(InvertedIndexAccessMethod.java:713)
    at edu.uci.ics.asterix.optimizer.rules.am.InvertedIndexAccessMethod.applyJoinPlanTransformation(InvertedIndexAccessMethod.java:630)
    at edu.uci.ics.asterix.optimizer.rules.am.IntroduceJoinAccessMethodRule.rewritePost(IntroduceJoinAccessMethodRule.java:150)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:122)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
    at edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController.rewriteWithRuleCollection(SequentialFixpointRuleController.java:49)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer.runOptimizationSets(HeuristicOptimizer.java:91)
    at edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer.optimize(HeuristicOptimizer.java:78)
    at edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder$1$1.optimize(HeuristicCompilerFactoryBuilder.java:83)
    at edu.uci.ics.asterix.api.common.APIFramework.compileQuery(APIFramework.java:309)
    at edu.uci.ics.asterix.aql.translator.AqlTranslator.rewriteCompileQuery(AqlTranslator.java:1722)
    at edu.uci.ics.asterix.aql.translator.AqlTranslator.handleQuery(AqlTranslator.java:2034)
    at edu.uci.ics.asterix.aql.translator.AqlTranslator.compileAndExecute(AqlTranslator.java:315)
    at edu.uci.ics.asterix.api.http.servlet.APIServlet.doPost(APIServlet.java:97)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:727)
    at javax.servlet.http.HttpServlet.service(HttpServlet.java:820)
    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:546)
    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:483)
    at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:228)
    at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:956)
    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:411)
    at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:188)
    at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:891)
    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:117)
    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:114)
    at org.eclipse.jetty.server.Server.handle(Server.java:353)
    at org.eclipse.jetty.server.HttpConnection.handleRequest(HttpConnection.java:598)
    at org.eclipse.jetty.server.HttpConnection$RequestHandler.content(HttpConnection.java:1076)
    at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:805)
    at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:218)
    at org.eclipse.jetty.server.HttpConnection.handle(HttpConnection.java:427)
    at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:510)
    at org.eclipse.jetty.io.nio.SelectChannelEndPoint.access$000(SelectChannelEndPoint.java:34)
    at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:40)
    at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:450)
    at java.lang.Thread.run(Thread.java:745)

Original issue reported on code.google.com by kiss...@gmail.com on 16 Mar 2015 at 11:09

GoogleCodeExporter commented 8 years ago
Ignore the exception stack shown in the issue. (it's from my development branch)
The following one shows the exception stack from the main branch.

edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException: The 
subtree does not have any data source.
        at edu.uci.ics.asterix.optimizer.rules.am.OptimizableOperatorSubTree.getPrimaryKeyVars(OptimizableOperatorSubTree.java:199)
        at edu.uci.ics.asterix.optimizer.rules.am.InvertedIndexAccessMethod.copyAndReinitProbeSubTree(InvertedIndexAccessMethod.java:558)
        at edu.uci.ics.asterix.optimizer.rules.am.InvertedIndexAccessMethod.applyJoinPlanTransformation(InvertedIndexAccessMethod.java:475)
        at edu.uci.ics.asterix.optimizer.rules.am.IntroduceJoinAccessMethodRule.rewritePost(IntroduceJoinAccessMethodRule.java:169)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:122)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController.rewriteOperatorRef(AbstractRuleController.java:96)
        at edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController.rewriteWithRuleCollection(SequentialFixpointRuleController.java:49)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer.runOptimizationSets(HeuristicOptimizer.java:91)
        at edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer.optimize(HeuristicOptimizer.java:78)
        at edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder$1$1.optimize(HeuristicCompilerFactoryBuilder.java:83)
        at edu.uci.ics.asterix.api.common.APIFramework.compileQuery(APIFramework.java:309)
        at edu.uci.ics.asterix.aql.translator.AqlTranslator.rewriteCompileQuery(AqlTranslator.java:1771)
        at edu.uci.ics.asterix.aql.translator.AqlTranslator.handleQuery(AqlTranslator.java:2083)
        at edu.uci.ics.asterix.aql.translator.AqlTranslator.compileAndExecute(AqlTranslator.java:338)
        at edu.uci.ics.asterix.api.http.servlet.APIServlet.doPost(APIServlet.java:97)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:754)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:847)
        at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:546)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:483)
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:970)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:411)
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:192)
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:904)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:117)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:110)
        at org.eclipse.jetty.server.Server.handle(Server.java:347)
        at org.eclipse.jetty.server.HttpConnection.handleRequest(HttpConnection.java:439)
        at org.eclipse.jetty.server.HttpConnection$RequestHandler.content(HttpConnection.java:924)
        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:781)
        at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:214)
        at org.eclipse.jetty.server.AsyncHttpConnection.handle(AsyncHttpConnection.java:43)
        at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:545)
        at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:43)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:529)
        at java.lang.Thread.run(Thread.java:745)

Original comment by kiss...@gmail.com on 16 Mar 2015 at 11:16