ActianCorp / spark-vector

Repository for the Spark-Vector connector
Apache License 2.0
20 stars 9 forks source link

QA - SPARK Parallel unloader - query failing with "MismatchedTokenException(91!=30)" #45

Closed Pyrobal closed 8 years ago

Pyrobal commented 8 years ago

5.0H latest daily and latest spark 1.0 jar.

sqlContext.sql("select distinct col_int, col_char20 from vwload_reg02_unload_tbl UNION DISTINCT select col_int, col_char20 from vwload_reg02_unload_tbl2").write.format("com.databricks.spark.csv").save(s"$savepath/vwload_reg02_unload_tbl_14.csv")

MismatchedTokenException(91!=30) at org.antlr.runtime.BaseRecognizer.recoverFromMismatchedToken(BaseRecognizer.java:617) at org.antlr.runtime.BaseRecognizer.match(BaseRecognizer.java:115) at org.apache.hadoop.hive.ql.parse.HiveParser.setOperator(HiveParser.java:40128) at org.apache.hadoop.hive.ql.parse.HiveParser.selectStatement(HiveParser.java:41423) at org.apache.hadoop.hive.ql.parse.HiveParser.regularBody(HiveParser.java:41313) at org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpressionBody(HiveParser.java:40366) at org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpression(HiveParser.java:40242) at org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1526) at org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1062) at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:201) at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166) at org.apache.spark.sql.hive.HiveQl$.getAst(HiveQl.scala:259) at org.apache.spark.sql.hive.HiveQl$.createPlan(HiveQl.scala:284) at org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:41) at org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:40) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136) at scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135) ....

select distinct col_int, col_char20 from vwload_reg02_unload_tbl UNION DISTINCT select col_int, col_char20 from vwload_reg02_unload_tbl2\g

+-------------+--------------------+ |col_int |col_char20 | +-------------+--------------------+ | 83|Ö | | 65|Hello |

...

| 31|F | | 36|K | +-------------+--------------------+ (121 rows)

TESTCASE:

use attached data file

create table vwload_reg02_unload_tbl (col_int int, col_float4 float4, col_money money,col_decimal382 decimal(38,2),col_decimal102 decimal(10,2), col_char20 char(20), col_varchar20 varchar(20), col_nchar20 nchar(20),col_nvarchar nvarchar(20), col_ansidate ansidate, col_timestamp timestamp);\g

vwload --nullvalue NULL --fdelim "\t" --table vwload_reg02_unload_tbl SEPPARAMDB vwload_reg02_data.txt

create table vwload_reg02_unload_tbl2 as select * from vwload_reg02_unload_tbl\g

val savepath =sys.env("HDFS_TMP") val installation_ID= sys.env("TMP_II_INSTALLATION") val hostname= sys.env("TMP_HOSTNAME") val databasename= sys.env("SEPPARAMDB") val user = sys.env.get("II_USER") val password = sys.env.get("II_PASSWORD")

val usernameAndPasswordStatement = (user, password) match { case (Some(u), Some(p)) => s""", user "$u", password "$p"""" case _ => "" }

sqlContext.sql(s"""CREATE TEMPORARY TABLE vwload_reg02_unload_tbl USING com.actian.spark_vector.sql.DefaultSource OPTIONS ( host "$hostname", instance "$installation_ID", database "$databasename", table "vwload_reg02_unload_tbl" $usernameAndPasswordStatement)""")

sqlContext.sql(s"""CREATE TEMPORARY TABLE vwload_reg02_unload_tbl2 USING com.actian.spark_vector.sql.DefaultSource OPTIONS ( host "$hostname", instance "$installation_ID", database "$databasename", table "vwload_reg02_unload_tbl2" $usernameAndPasswordStatement)""")

sqlContext.sql("select distinct col_int, col_char20 from vwload_reg02_unload_tbl UNION DISTINCT select col_int, col_char20 from vwload_reg02_unload_tbl2").write.format("com.databricks.spark.csv").save(s"$savepath/vwload_reg02_unload_tbl_14.csv")

cbarca commented 8 years ago

same as #46

looks like Spark 1.5 doesn't support the Except, Intersect and Union Distinct set of SQL operations https://issues.apache.org/jira/browse/SPARK-13236