harsha2010 / magellan

Geo Spatial Data Analytics on Spark
Apache License 2.0
533 stars 149 forks source link

No Spatial join on indexs when used with left outer join #181

Closed khajaasmath786 closed 7 years ago

khajaasmath786 commented 7 years ago

Hi,

I am getting below error when trying to use indexs with magellan. here is the code and error. wont this package work with left outer joins?

Exception in thread "main" java.lang.AssertionError: assertion failed: No plan for SpatialJoinHint Map(magellan.index.precision -> 25) +- Project [polygon#7, metadata#8[road] AS road#296] +- Relation[point#5,polyline#6,polygon#7,metadata#8,valid#9,index#10] GeoJSONRelation(/user/yyy1k78/occtelematics/data/roadsgeojson,Map(type -> geojson, magellan.index -> true, magellan.index.precision -> 25, path -> /user/yyy1k78/occtelematics/data/roadsgeojson))

    at scala.Predef$.assert(Predef.scala:170)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.Iterator$class.foreach(Iterator.scala:893)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
    at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
    at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
    at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.Iterator$class.foreach(Iterator.scala:893)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
    at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
    at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
    at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
    at scala.collection.Iterator$class.foreach(Iterator.scala:893)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
    at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
    at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
    at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
    at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
    at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:79)
    at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:75)
    at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:84)
    at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:84)
    at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2791)
    at org.apache.spark.sql.Dataset.head(Dataset.scala:2112)
    at org.apache.spark.sql.Dataset.take(Dataset.scala:2327)
    at org.apache.spark.sql.Dataset.showString(Dataset.scala:248)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:636)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:595)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:604)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver$.joinDataPointWithRoads(GeoLocationMLDriver.scala:275)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver$.apply(GeoLocationMLDriver.scala:207)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver$$anonfun$main$1.apply$mcV$sp(GeoLocationMLDriver.scala:127)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver$$anonfun$main$1.apply(GeoLocationMLDriver.scala:127)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver$$anonfun$main$1.apply(GeoLocationMLDriver.scala:127)
    at scala.util.Try$.apply(Try.scala:192)
    at com.navistar.telematics.utils.TimeTracker$.apply(TimeTracker.scala:12)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver$.main(GeoLocationMLDriver.scala:127)
    at com.navistar.telematics.datascience.drivers.GeoLocationMLDriver.main(GeoLocationMLDriver.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Code: val filteredRoadDS=geoFenceWithPointDF.join(roadsPolygonDS.select("polygon","metadata","metadata.road") index 25,($"point" within $"polygon"), "leftOuter") filteredRoadDS.printSchema()

harsha2010 commented 7 years ago

remove the index hint for left outer join as left outer does not use spatial indexes yet There is a issue open to add indexes to left outer