StarRocks / demo

Apache License 2.0
83 stars 57 forks source link

Apache Hudi Demo with StarRocks Create DF throws Warning #42

Closed soumilshah1995 closed 8 months ago

soumilshah1995 commented 8 months ago

Hello i am trying out some labs https://github.com/StarRocks/demo after launching container locally i exec into it /spark-3.2.1-bin-hadoop3.2/bin/spark-shell

i tried

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
import scala.collection.JavaConversions._

val schema = StructType( Array(
                 StructField("language", StringType, true),
                 StructField("users", StringType, true),
                 StructField("id", StringType, true)
             ))

val rowData= Seq(Row("Java", "20000", "a"), 
               Row("Python", "100000", "b"), 
               Row("Scala", "3000", "c"))

val df = spark.createDataFrame(rowData,schema)

Create DF throws Warning

warning: one deprecation (since 2.12.0); for details, enable `:setting -deprecation' or `:replay -deprecation'
54481 [main] WARN  org.apache.hadoop.fs.FileSystem  - Failed to initialize fileystem hdfs://hadoop-master:9000/user/hive/warehouse: java.lang.IllegalArgumentException: java.net.UnknownHostException: hadoop-master
54485 [main] WARN  org.apache.spark.sql.internal.SharedState  - Cannot qualify the warehouse path, leaving it unqualified.
java.lang.IllegalArgumentException: java.net.UnknownHostException: hadoop-master
        at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:466)
        at org.apache.hadoop.hdfs.NameNodeProxiesClient.createProxyWithClientProtocol(NameNodeProxiesClient.java:134)
        at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:374)
        at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:308)
        at org.apache.hadoop.hdfs.DistributedFileSystem.initDFSClient(DistributedFileSystem.java:201)
        at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:186)
        at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3469)
        at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:174)
        at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3574)
        at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3521)
        at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540)
        at org.apache.hadoop.fs.Path.getFileSystem(Path.java:365)
        at org.apache.spark.sql.internal.SharedState$.qualifyWarehousePath(SharedState.scala:282)
        at org.apache.spark.sql.internal.SharedState.liftedTree1$1(SharedState.scala:80)
        at org.apache.spark.sql.internal.SharedState.<init>(SharedState.scala:79)
        at org.apache.spark.sql.SparkSession.$anonfun$sharedState$1(SparkSession.scala:139)
        at scala.Option.getOrElse(Option.scala:189)
        at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:139)
        at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:138)
        at org.apache.spark.sql.SparkSession.$anonfun$sessionState$2(SparkSession.scala:158)
        at scala.Option.getOrElse(Option.scala:189)
        at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:156)
        at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:153)
        at org.apache.spark.sql.SparkSession.$anonfun$new$3(SparkSession.scala:113)
        at scala.Option.map(Option.scala:230)
        at org.apache.spark.sql.SparkSession.$anonfun$new$1(SparkSession.scala:113)
        at org.apache.spark.sql.internal.SQLConf$.get(SQLConf.scala:230)
        at org.apache.spark.sql.catalyst.util.CharVarcharUtils$.failIfHasCharVarchar(CharVarcharUtils.scala:63)
        at org.apache.spark.sql.SparkSession.$anonfun$createDataFrame$4(SparkSession.scala:387)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:386)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:46)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:52)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:54)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:56)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:58)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:60)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:62)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:64)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:66)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:68)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:70)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:72)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:74)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:76)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:78)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:80)
        at $line24.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:82)
        at $line24.$read$$iw$$iw$$iw$$iw.<init>(<console>:84)
        at $line24.$read$$iw$$iw$$iw.<init>(<console>:86)
        at $line24.$read$$iw$$iw.<init>(<console>:88)
        at $line24.$read$$iw.<init>(<console>:90)
        at $line24.$read.<init>(<console>:92)
        at $line24.$read$.<init>(<console>:96)
        at $line24.$read$.<clinit>(<console>)
        at $line24.$eval$.$print$lzycompute(<console>:7)
        at $line24.$eval$.$print(<console>:6)
        at $line24.$eval.$print(<console>)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:566)
        at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)
        at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1020)
        at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:568)
        at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
        at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
        at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
        at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:567)
        at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:594)
        at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:564)
        at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:865)
        at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:733)
        at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:435)
        at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:456)
        at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:239)
        at org.apache.spark.repl.Main$.doMain(Main.scala:78)
        at org.apache.spark.repl.Main$.main(Main.scala:58)
        at org.apache.spark.repl.Main.main(Main.scala)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:566)
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
        at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
        at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
        at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
        at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.net.UnknownHostException: hadoop-master
        ... 92 more
df: org.apache.spark.sql.DataFrame = [language: string, users: string ... 1 more field]

any idea how i can disable or should i ignore this warning ?

soumilshah1995 commented 8 months ago

got it resolved by disabling warninngs