master / spark-stemming

Spark MLlib wrapper for the Snowball framework
BSD 2-Clause "Simplified" License
33 stars 20 forks source link

Which versions of Spark does this work with? #9

Open bitdivine opened 6 years ago

bitdivine commented 6 years ago

Thank you for what looks like some useful code. However, what does it need to run? I tried your sample code with spark 2.2 and 1.6 and both failed:

    $ spark-shell-2.2 --packages com.github.master:spark-stemming_2.10:0.2.0
...
scala> val sqlContext = spark.sqlContext
sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@7aeb5402

scala> {
     | import org.apache.spark.mllib.feature.Stemmer
     |
     | val data = sqlContext
     |   .createDataFrame(Seq(("мама", 1), ("мыла", 2), ("раму", 3)))
     |   .toDF("word", "id")
     |
     | val stemmed = new Stemmer()
     |   .setInputCol("word")
     |   .setOutputCol("stemmed")
     |   .setLanguage("Russian")
     |   .transform(data)
     |
     | stemmed.show
     | }
java.lang.IllegalArgumentException: requirement failed: Input type must be ArrayType(StringType) but got StringType.
  at scala.Predef$.require(Predef.scala:224)
  at org.apache.spark.mllib.feature.Stemmer.validateInputType(Stemmer.scala:31)
  at org.apache.spark.ml.UnaryTransformer.transformSchema(Transformer.scala:110)
  at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:74)
  at org.apache.spark.ml.UnaryTransformer.transform(Transformer.scala:120)
  ... 62 elided

scala>