YotpoLtd / metorikku

A simplified, lightweight ETL Framework based on Apache Spark
https://yotpoltd.github.io/metorikku/
MIT License
581 stars 154 forks source link

hudi - java.lang.ClassNotFoundException: com.uber.hoodie.SimpleKeyGenerator error #215

Open tooptoop4 opened 5 years ago

tooptoop4 commented 5 years ago

Using spark 2.3, v0.0.51/metorikku-standalone.jar

/home/ec2-user/spark_home/bin/spark-submit --master local[*] --conf "spark.sql.parquet.writeLegacyFormat=true" --class com.yotpo.metorikku.Metorikku --jars "/usr/lib/apache-hive-2.3.4-bin/lib/mysql-connector-java.jar,/home/ec2-user/hoodie-spark-bundle-0.4.7.jar" --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" /home/ec2-user/metorikku-standalone.jar -c /home/ec2-user/hi.yaml

Exception in thread "main" com.yotpo.metorikku.exceptions.MetorikkuWriteFailedException: Failed to write dataFrame: dac_curr to output: Hudi on metric: hm at com.yotpo.metorikku.metric.Metric.com$yotpo$metorikku$metric$Metric$$writeBatch(Metric.scala:101) at com.yotpo.metorikku.metric.Metric$$anonfun$write$1.apply(Metric.scala:136) at com.yotpo.metorikku.metric.Metric$$anonfun$write$1.apply(Metric.scala:125) at scala.collection.immutable.List.foreach(List.scala:381) at com.yotpo.metorikku.metric.Metric.write(Metric.scala:125) at com.yotpo.metorikku.metric.MetricSet$$anonfun$run$1.apply(MetricSet.scala:44) at com.yotpo.metorikku.metric.MetricSet$$anonfun$run$1.apply(MetricSet.scala:39) at scala.collection.immutable.List.foreach(List.scala:381) at com.yotpo.metorikku.metric.MetricSet.run(MetricSet.scala:39) at com.yotpo.metorikku.Metorikku$$anonfun$runMetrics$1.apply(Metorikku.scala:17) at com.yotpo.metorikku.Metorikku$$anonfun$runMetrics$1.apply(Metorikku.scala:15) at scala.collection.immutable.List.foreach(List.scala:381) at com.yotpo.metorikku.Metorikku$.runMetrics(Metorikku.scala:15) at com.yotpo.metorikku.Metorikku$.delayedEndpoint$com$yotpo$metorikku$Metorikku$1(Metorikku.scala:11) at com.yotpo.metorikku.Metorikku$delayedInit$body.apply(Metorikku.scala:7) at scala.Function0$class.apply$mcV$sp(Function0.scala:34) at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12) at scala.App$$anonfun$main$1.apply(App.scala:76) at scala.App$$anonfun$main$1.apply(App.scala:76) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35) at scala.App$class.main(App.scala:76) at com.yotpo.metorikku.Metorikku$.main(Metorikku.scala:7) at com.yotpo.metorikku.Metorikku.main(Metorikku.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.io.IOException: Could not load key generator class com.uber.hoodie.SimpleKeyGenerator at com.uber.hoodie.DataSourceUtils.createKeyGenerator(DataSourceUtils.java:98) at com.uber.hoodie.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:87) at com.uber.hoodie.DefaultSource.createRelation(DefaultSource.scala:91) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:656) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267) at com.yotpo.metorikku.output.writers.file.HudiOutputWriter.write(HudiOutputWriter.scala:115) at com.yotpo.metorikku.metric.Metric.com$yotpo$metorikku$metric$Metric$$writeBatch(Metric.scala:97) ... 33 more Caused by: com.uber.hoodie.exception.HoodieException: Unable to load class at com.uber.hoodie.common.util.ReflectionUtils.getClass(ReflectionUtils.java:40) at com.uber.hoodie.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:74) at com.uber.hoodie.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:87) at com.uber.hoodie.DataSourceUtils.createKeyGenerator(DataSourceUtils.java:96) ... 55 more Caused by: java.lang.ClassNotFoundException: com.uber.hoodie.SimpleKeyGenerator at java.net.URLClassLoader.findClass(URLClassLoader.java:382) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:264) at com.uber.hoodie.common.util.ReflectionUtils.getClass(ReflectionUtils.java:37) ... 58 more

lyogev commented 5 years ago

This is very strange, you are right, it's the wrong class... we need to update it, but it's working for us in production and tests so I'm wondering what's going on... but we'll fix

lyogev commented 5 years ago

Where did you get the JAR from?

lyogev commented 5 years ago

This is the one we're using: https://repo1.maven.org/maven2/com/uber/hoodie/hoodie-spark-bundle/0.4.7/hoodie-spark-bundle-0.4.7.jar