holdenk / spark-testing-base

Base classes to use when writing tests with Spark
Apache License 2.0
1.52k stars 358 forks source link

Hive support is required to CREATE Hive TABLE (AS SELECT);; #292

Open minnieshi opened 5 years ago

minnieshi commented 5 years ago

below is the sbt dependency: SparkTestingBase = Seq("com.holdenkarau" %% "spark-testing-base" % "2.2.0_0.10.0" % Test excludeAll ExclusionRule(organization = "org.apache.hadoop"))

below is the test case `import com.holdenkarau.spark.testing.DataFrameSuiteBase import org.scalatest.{FlatSpec, Matchers}

class SparkOpsTest extends FlatSpec with DataFrameSuiteBase with Matchers {

behavior of "SparkOpsTest"

it should "Perform InputTableOps correctly" in { setupDb()

//todo: below is causing problems.
spark.sql("CREATE TABLE test_db_input.test_table_input(col String)")

}

override implicit def reuseContextIfPossible: Boolean = true

//setup env required for the testing. protected def setupDb() = { spark.sql("CREATE DATABASE IF NOT EXISTS test_db_input LOCATION '/tmp/test_db_input.db'")

} } `

and below is the detailed log

` 19/05/03 12:19:34 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 19/05/03 12:19:35 WARN Utils: Your hostname, min-vm resolves to a loopback address: 127.0.1.1; using 10.0.2.15 instead (on interface enp0s3) 19/05/03 12:19:35 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address 19/05/03 12:19:39 WARN SparkContext: Using an existing SparkContext; some configuration may not take effect.

Hive support is required to CREATE Hive TABLE (AS SELECT);; 'CreateTable test_db_input.test_table_input, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, ErrorIfExists

org.apache.spark.sql.AnalysisException: Hive support is required to CREATE Hive TABLE (AS SELECT);; 'CreateTable test_db_input.test_table_input, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, ErrorIfExists

at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$$anonfun$apply$12.apply(rules.scala:400)
at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$$anonfun$apply$12.apply(rules.scala:398)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:117)
at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$.apply(rules.scala:398)
at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$.apply(rules.scala:396)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$2.apply(CheckAnalysis.scala:409)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$2.apply(CheckAnalysis.scala:409)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:409)
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:91)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:66)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:623)
at com.nordea.dq.sparkOps.SparkOpsTest$$anonfun$1.apply(SparkOpsTest.scala:14)
at com.nordea.dq.sparkOps.SparkOpsTest$$anonfun$1.apply(SparkOpsTest.scala:10)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FlatSpecLike$$anon$1.apply(FlatSpecLike.scala:1682)
at org.scalatest.TestSuite$class.withFixture(TestSuite.scala:196)
at org.scalatest.FlatSpec.withFixture(FlatSpec.scala:1685)
at org.scalatest.FlatSpecLike$class.invokeWithFixture$1(FlatSpecLike.scala:1679)
at org.scalatest.FlatSpecLike$$anonfun$runTest$1.apply(FlatSpecLike.scala:1692)
at org.scalatest.FlatSpecLike$$anonfun$runTest$1.apply(FlatSpecLike.scala:1692)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
at org.scalatest.FlatSpecLike$class.runTest(FlatSpecLike.scala:1692)
at org.scalatest.FlatSpec.runTest(FlatSpec.scala:1685)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1750)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1750)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:396)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:373)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:410)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:379)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
at org.scalatest.FlatSpecLike$class.runTests(FlatSpecLike.scala:1750)
at org.scalatest.FlatSpec.runTests(FlatSpec.scala:1685)
at org.scalatest.Suite$class.run(Suite.scala:1147)
at org.scalatest.FlatSpec.org$scalatest$FlatSpecLike$$super$run(FlatSpec.scala:1685)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1795)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1795)
at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
at org.scalatest.FlatSpecLike$class.run(FlatSpecLike.scala:1795)
at com.nordea.dq.sparkOps.SparkOpsTest.org$scalatest$BeforeAndAfterAll$$super$run(SparkOpsTest.scala:6)
at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:210)
at com.nordea.dq.sparkOps.SparkOpsTest.run(SparkOpsTest.scala:6)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1346)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1340)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1506)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
at org.scalatest.tools.Runner$.run(Runner.scala:850)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)`
LvffY commented 5 years ago

Is there any update on this issue ? I encounter the same problem on 2.3.2_0.11.0 version on maven builds

ashkan-leo commented 4 years ago

I encountered the same issue using 2.4.3_0.14.0 on maven

kuheli004 commented 4 years ago

what is the solution .I am now facing the same issue.

kuheli004 commented 4 years ago

I am facing the same issue with version spark-testing-base_2.11_2.3.0.9.0

herinlearning commented 4 years ago

i am facing same issue. Can solution be provided

portus84 commented 4 years ago

+1

khalidgt95 commented 3 years ago

Same issue in Java as well even after enabling hive support and putting the dependencies in pom.xml file.

ankur-j commented 3 years ago

Also facing the same issue.

holdenk commented 3 years ago

Can you share your build file @ankur-j?

ankur-j commented 3 years ago

@holdenk we dont use sbt, but here is a snippet from the BUCK file

load("//tooling/defs:uber_targets.bzl", "java_binary", "scala_library", "scala_test")

scala_library(
    name = "src_main",
    maven_coords = "com.uber.eats.spark:feature-aggregator:unspecified",
    provided_deps = [
        "//3rdparty/jvm/org/apache/spark:spark-core_2.11-2.4.3.jar",
        "//3rdparty/jvm/org/apache/spark:spark-hive_2.11-2.4.3.jar",
        "//3rdparty/jvm/org/apache/spark:spark-sql_2.11-2.4.3.jar",
        "//3rdparty/jvm/org/apache/hadoop:hadoop-client-2.7.2.jar",
    ],
    tests = [
        ":test_main",
    ],
    visibility = [
        "PUBLIC",
    ],
    deps = [
        "//3rdparty/jvm/com/google/code/gson:gson-2.8.6.jar",
        "//3rdparty/jvm/io/circe:circe-generic-extras_2.11-0.12.0-M3.jar",
        "//3rdparty/jvm/io/circe:circe-generic_2.11-0.12.0-M3.jar",
        "//3rdparty/jvm/io/circe:circe-yaml_2.11-0.11.0-M1.jar",
        "//3rdparty/jvm/org/rogach:scallop_2.11-3.1.2.jar",
    ],
)

java_binary(
    name = "bin_main",
    exclude_list = [
        "org.apache.commons",
        "org.slf4j",
        "scala",
    ],
    tests = [
        ":test_main",
    ],
    deps = [
        ":src_main",
    ],
)

scala_test(
    name = "test_main",
    deps = [
        ":src_main",
        "//3rdparty/jvm/org/apache/hadoop:hadoop-client-2.7.2.jar",
        "//3rdparty/jvm/org/apache/spark:spark-core_2.11-2.4.3.jar",
        "//3rdparty/jvm/org/apache/spark:spark-hive_2.11-2.4.3.jar",
        "//3rdparty/jvm/org/apache/spark:spark-sql_2.11-2.4.3.jar",
        "//3rdparty/jvm/org/mockito:mockito-scala_2.11-1.13.6.jar",
        "//fievel-common/testing/scala-base:src_main",
        "//3rdparty/jvm/com/holdenkarau:spark-testing-base_2.11-2.3.1_0.10.0.jar"
    ],
)
ankur-j commented 3 years ago

@holdenk did you get a chance to look at this?

jrevillard commented 2 years ago

Same issue for me .... Could you please have a look ?

Relevent pom.xml part:

<properties>
        <java.version>1.8</java.version>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <spark.version>3.1.2</spark.version>
        <scala.binary.version>2.12</scala.binary.version>
        <spark-testing-base.version>1.1.1</spark-testing-base.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>com.holdenkarau</groupId>
            <artifactId>spark-testing-base_${scala.binary.version}</artifactId>
            <version>${spark.version}_${spark-testing-base.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

Here is the test class:

package com.test;

import org.junit.Test;
import com.holdenkarau.spark.testing.JavaDataFrameSuiteBase;

public class HiveTests extends JavaDataFrameSuiteBase {

  @Override
  public boolean enableHiveSupport() {
    return true;
  }

  @Override
  public void beforeAllTestCasesHook() {
    super.beforeAllTestCasesHook();
    spark().sql("CREATE DATABASE my_schema");
    spark()
        .sql("CREATE TABLE my_schema.test_table ( test string ) stored as orc");
  }

  @Test
  public void simpleTest() {

  }

}

Console output:

22/02/17 19:44:32 WARN Utils: Your hostname, LINUX-3B70DK3 resolves to a loopback address: 127.0.1.1; using 192.168.78.152 instead (on interface wlp0s20f3)
22/02/17 19:44:32 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/home/jerome/.m2/repository/org/apache/spark/spark-unsafe_2.12/3.1.2/spark-unsafe_2.12-3.1.2.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
22/02/17 19:44:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
22/02/17 19:44:33 WARN SparkContext: Using an existing SparkContext; some configuration may not take effect.
22/02/17 19:44:35 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist
22/02/17 19:44:35 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist
22/02/17 19:44:38 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0
22/02/17 19:44:38 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore jerome@127.0.1.1
22/02/17 19:44:38 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException
22/02/17 19:44:38 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
22/02/17 19:44:38 WARN ObjectStore: Failed to get database my_schema, returning NoSuchObjectException

And finally, the full exception:

org.apache.spark.sql.AnalysisException: Hive support is required to CREATE Hive TABLE (AS SELECT);
'CreateTable `my_schema`.`test_table`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, ErrorIfExists

    at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$.$anonfun$apply$4(rules.scala:462)
    at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$.$anonfun$apply$4$adapted(rules.scala:460)
    at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:174)
    at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$.apply(rules.scala:460)
    at org.apache.spark.sql.execution.datasources.HiveOnlyCheck$.apply(rules.scala:458)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$46(CheckAnalysis.scala:699)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$46$adapted(CheckAnalysis.scala:699)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:699)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:90)
    at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:155)
    at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:176)
    at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:228)
    at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:173)
    at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73)
    at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
    at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
    at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143)
    at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73)
    at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71)
    at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63)
    at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:98)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
    at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
    at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:618)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
    at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:613)
    at com.test.HiveTests.beforeAllTestCasesHook(HiveTests.java:19)
    at com.holdenkarau.spark.testing.SharedJavaSparkContext.runBefore(SharedJavaSparkContext.java:54)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.base/java.lang.reflect.Method.invoke(Method.java:566)
    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
    at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
    at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
    at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
    at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
    at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:93)
    at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:40)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:529)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:756)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:452)
    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:210)

Best, Jerome

jrevillard commented 2 years ago

Ok I managed to make it working. I had to override another method in my HiveTests class:

  @Override
  public SparkConf conf() {
    SparkConf conf = super.conf();
    conf.set("spark.sql.catalogImplementation", "hive");
    return conf;
  }

I think there is something which is not initialised in the proper order (certainly SparkContext....)

Best, Jerome

Roganliu commented 1 year ago

I encountered the same issue using Hive 2.3.7when in Zeppelin spark3.2.4 Mysql8.0.2