databricks / spark-sql-perf

Apache License 2.0
582 stars 406 forks source link

Getting error when analyzing the columns #192

Open jitheshksn opened 3 years ago

jitheshksn commented 3 years ago

I am trying to generate data for TPC-DS and data is getting generated but while analyzing the table it throws exception.

This is the log: It starts analyzing the tables but failed with first table. Analyzing table catalog_sales. Analyzing table catalog_sales columns cs_sold_date_sk, cs_sold_time_sk, cs_ship_date_sk, cs_bill_customer_sk, cs_bill_cdemo_sk, cs_bill_hdemo_sk, cs_bill_addr_sk, cs_ship_customer_sk, cs_ship_cdemo_sk, cs_ship_hdemo_sk, cs_ship_addr_sk, cs_call_center_sk, cs_catalog_page_sk, cs_ship_mode_sk, cs_warehouse_sk, cs_item_sk, cs_promo_sk, cs_order_number, cs_quantity, cs_wholesale_cost, cs_list_price, cs_sales_price, cs_ext_discount_amt, cs_ext_sales_price, cs_ext_wholesale_cost, cs_ext_list_price, cs_ext_tax, cs_coupon_amt, cs_ext_ship_cost, cs_net_paid, cs_net_paid_inc_tax, cs_net_paid_inc_ship, cs_net_paid_inc_ship_tax, cs_net_profit.

org.apache.spark.sql.AnalysisException: Column cs_sold_date_sk does not exist. at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.$anonfun$getColumnsToAnalyze$3(AnalyzeColumnCommand.scala:90) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.$anonfun$getColumnsToAnalyze$1(AnalyzeColumnCommand.scala:90) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.getColumnsToAnalyze(AnalyzeColumnCommand.scala:88) at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.analyzeColumnInCatalog(AnalyzeColumnCommand.scala:116) at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.run(AnalyzeColumnCommand.scala:51) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:234) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3702) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:249) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:836) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:199) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3700) at org.apache.spark.sql.Dataset.(Dataset.scala:234) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:104) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:836) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:101) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:671) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:836) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:666) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:672) at com.databricks.spark.sql.perf.Tables$Table.analyzeTable(Tables.scala:280) at com.databricks.spark.sql.perf.Tables.$anonfun$analyzeTables$2(Tables.scala:352) at com.databricks.spark.sql.perf.Tables.$anonfun$analyzeTables$2$adapted(Tables.scala:351) at scala.collection.immutable.List.foreach(List.scala:392) at com.databricks.spark.sql.perf.Tables.analyzeTables(Tables.scala:351) at line533d73f9645742d2b1ac0c523afb366f27.$read$$iw$$iw$$iw$$iw$$iw$$iw.(command-4211249281701897:48) at line533d73f9645742d2b1ac0c523afb366f27.$read$$iw$$iw$$iw$$iw$$iw.(command-4211249281701897:100) at line533d73f9645742d2b1ac0c523afb366f27.$read$$iw$$iw$$iw$$iw.(command-4211249281701897:102) at line533d73f9645742d2b1ac0c523afb366f27.$read$$iw$$iw$$iw.(command-4211249281701897:104) at line533d73f9645742d2b1ac0c523afb366f27.$read$$iw$$iw.(command-4211249281701897:106) at line533d73f9645742d2b1ac0c523afb366f27.$read$$iw.(command-4211249281701897:108) at line533d73f9645742d2b1ac0c523afb366f27.$read.(command-4211249281701897:110) at line533d73f9645742d2b1ac0c523afb366f27.$read$.(command-4211249281701897:114) at line533d73f9645742d2b1ac0c523afb366f27.$read$.(command-4211249281701897) at line533d73f9645742d2b1ac0c523afb366f27.$eval$.$print$lzycompute(:7) at line533d73f9645742d2b1ac0c523afb366f27.$eval$.$print(:6) at line533d73f9645742d2b1ac0c523afb366f27.$eval.$print() at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1021) at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:574) at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:41) at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:37) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41) at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:573) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:600) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:570) at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:219) at com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:204) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:773) at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:726) at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:204) at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$10(DriverLocal.scala:431) at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:239) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:234) at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:231) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:48) at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:276) at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:269) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:48) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:408) at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:653) at scala.util.Try$.apply(Try.scala:213) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:645) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:486) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:598) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:391) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:337) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:219) at java.lang.Thread.run(Thread.java:748)