SANSA-Stack / Archived-SANSA-Query

SANSA Query Layer
Apache License 2.0
31 stars 13 forks source link

Exception in QuerySystem with valid SPARQL query #22

Closed patrickwestphal closed 5 years ago

patrickwestphal commented 6 years ago

When trying to run the QuerySystem like so

import java.io.File

import org.apache.commons.io.FileUtils
import org.apache.jena.graph.Triple
import org.apache.jena.riot.Lang
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

import net.sansa_stack.query.spark.semantic.QuerySystem

object Foo {
  val symbol = Map(
    "space" -> " " * 5,
    "blank" -> " ",
    "tabs" -> "\t",
    "newline" -> "\n",
    "colon" -> ":",
    "comma" -> ",",
    "hash" -> "#",
    "slash" -> "/",
    "question-mark" -> "?",
    "exclamation-mark" -> "!",
    "curly-bracket-left" -> "{",
    "curly-bracket-right" -> "}",
    "round-bracket-left" -> "(",
    "round-bracket-right" -> ")",
    "less-than" -> "<",
    "greater-than" -> ">",
    "at" -> "@",
    "dot" -> ".",
    "dots" -> "...",
    "asterisk" -> "*",
    "up-arrows" -> "^^")

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder
      .master("local[*]")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .appName("SANSA - Semantic Partitioning")
      .getOrCreate()

    import net.sansa_stack.rdf.spark.io._
    import net.sansa_stack.rdf.spark.partition.semantic.RdfPartition

    val log: RDD[Triple] = spark.rdf(Lang.NTRIPLES)("/tmp/log.nt")
    val partition: RDD[String] = new RdfPartition(
      symbol, log, "/tmp/sem_partitions",
      spark.sparkContext.defaultMinPartitions).partitionGraph()

    val resultsDir = new File("/tmp/results")
    FileUtils.deleteDirectory(resultsDir)

    val qs = new QuerySystem(
        symbol,
        partition,
        "/tmp/query.sparql",
        resultsDir.getAbsolutePath,
        spark.sparkContext.defaultMinPartitions)
    qs.run()
  }
}

with /tmp/query.sparql containing the simple SPARQL query

SELECT ?s
WHERE
  { 
    ?s   ?p  ?o .
  }

I get an IndexOutOfBoundsException:

Exception in thread "main" java.lang.IndexOutOfBoundsException: 5
    at scala.collection.mutable.ResizableArray$class.apply(ResizableArray.scala:43)
    at scala.collection.mutable.ArrayBuffer.apply(ArrayBuffer.scala:48)
    at net.sansa_stack.query.spark.semantic.QuerySystem$$anonfun$refactorUnionQueries$1.apply$mcVI$sp(SparqlQuerySystem.scala:167)
    at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
    at net.sansa_stack.query.spark.semantic.QuerySystem.refactorUnionQueries(SparqlQuerySystem.scala:142)
    at net.sansa_stack.query.spark.semantic.QuerySystem$$anonfun$run$1.apply$mcVI$sp(SparqlQuerySystem.scala:48)
    at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
    at net.sansa_stack.query.spark.semantic.QuerySystem.run(SparqlQuerySystem.scala:46)

Modifying the query to

SELECT ?s
WHERE {
    ?s   ?p  ?o .
  }

at least makes the error disappear.