error:
21/01/31 02:42:10 ERROR BaseTableMetadata: Failed to retrieve list of partition from metadata java.lang.IllegalArgumentException: must be at-least one validata metadata file slice at org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40) at org.apache.hudi.metadata.HoodieBackedTableMetadata.openFileSliceIfNeeded(HoodieBackedTableMetadata.java:171) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKeyFromMetadata(HoodieBackedTableMetadata.java:118) at org.apache.hudi.metadata.BaseTableMetadata.getMergedRecordByKey(BaseTableMetadata.java:268) at org.apache.hudi.metadata.BaseTableMetadata.fetchAllPartitionPaths(BaseTableMetadata.java:149) at org.apache.hudi.metadata.BaseTableMetadata.getAllPartitionPaths(BaseTableMetadata.java:102) at org.apache.hudi.common.fs.FSUtils.getAllPartitionPaths(FSUtils.java:277) at org.apache.hudi.table.action.rollback.RollbackUtils.generateRollbackRequestsByListingCOW(RollbackUtils.java:98) at org.apache.hudi.table.action.rollback.SparkCopyOnWriteRollbackActionExecutor.executeRollbackUsingFileListing(SparkCopyOnWriteRollbackActionExecutor.java:69) at org.apache.hudi.table.action.rollback.BaseCopyOnWriteRollbackActionExecutor.executeRollback(BaseCopyOnWriteRollbackActionExecutor.java:81) at org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.doRollbackAndGetStats(BaseRollbackActionExecutor.java:170) at org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.execute(BaseRollbackActionExecutor.java:103) at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.rollback(HoodieSparkCopyOnWriteTable.java:230) at org.apache.hudi.client.AbstractHoodieWriteClient.rollback(AbstractHoodieWriteClient.java:552) at org.apache.hudi.internal.DataSourceInternalWriterHelper.abort(DataSourceInternalWriterHelper.java:91) at org.apache.hudi.spark3.internal.HoodieDataSourceInternalBatchWrite.abort(HoodieDataSourceInternalBatchWrite.java:89) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:403) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:361) at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:253) at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:259) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:39) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:39) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.doExecute(V2CommandExec.scala:54) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:944) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:106) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:207) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:88) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:335) at org.apache.hudi.HoodieSparkSqlWriter$.bulkInsertAsRow(HoodieSparkSqlWriter.scala:302) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:127) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:944) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:106) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:207) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:88) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:396) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:380) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:269) at hudiwriter.HudiWriter.createHudiTable(HudiWriter.scala:30) at hudiwriter.HudiContext.writeToHudi(HudiContext.scala:44) at jobs.TableProcessor.start(TableProcessor.scala:82) at TableProcessorWrapper$.$anonfun$main$2(TableProcessorWrapper.scala:23) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659) at scala.util.Success.$anonfun$map$1(Try.scala:255) at scala.util.Success.map(Try.scala:213) at scala.concurrent.Future.$anonfun$map$1(Future.scala:292) at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33) at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33) at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64) at java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402) at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:157)
Hello,
Im using Hudi on emr 6.1 Hudi Version: 0.8.0-SNAPSHOT
Hudi Options: Map(hoodie.datasource.hive_sync.database -> raw_courier_api_hudi, hoodie.datasource.hive_sync.partition.extractor.class -> org.apache.hudi.hive.NonPartitionedExtractor, hoodie.parquet.small.file.limit -> 67108864, hoodie.copyonwrite.record.size.estimate -> 1024, hoodie.datasource.write.precombine.field -> LineCreatedTimestamp, hoodie.parquet.max.file.size -> 157286400, hoodie.parquet.block.size -> 67108864, hoodie.datasource.hive_sync.table -> customer_notification, hoodie.datasource.write.operation -> bulk_insert, hoodie.datasource.write.hive.style.partitioning -> false, hoodie.datasource.hive_sync.enable -> true, hoodie.datasource.write.recordkey.field -> id, hoodie.table.name -> customer_notification, hoodie.datasource.hive_sync.jdbcurl -> jdbc:hive2://ip-10-0-20-232.us-west-2.compute.internal:10000, hoodie.datasource.write.table.name -> customer_notification, hoodie.datasource.write.keygenerator.class -> org.apache.hudi.keygen.NonpartitionedKeyGenerator, hoodie.bulkinsert.shuffle.parallelism -> 50, hoodie.metadata.enable -> true)
error:
21/01/31 02:42:10 ERROR BaseTableMetadata: Failed to retrieve list of partition from metadata java.lang.IllegalArgumentException: must be at-least one validata metadata file slice at org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40) at org.apache.hudi.metadata.HoodieBackedTableMetadata.openFileSliceIfNeeded(HoodieBackedTableMetadata.java:171) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKeyFromMetadata(HoodieBackedTableMetadata.java:118) at org.apache.hudi.metadata.BaseTableMetadata.getMergedRecordByKey(BaseTableMetadata.java:268) at org.apache.hudi.metadata.BaseTableMetadata.fetchAllPartitionPaths(BaseTableMetadata.java:149) at org.apache.hudi.metadata.BaseTableMetadata.getAllPartitionPaths(BaseTableMetadata.java:102) at org.apache.hudi.common.fs.FSUtils.getAllPartitionPaths(FSUtils.java:277) at org.apache.hudi.table.action.rollback.RollbackUtils.generateRollbackRequestsByListingCOW(RollbackUtils.java:98) at org.apache.hudi.table.action.rollback.SparkCopyOnWriteRollbackActionExecutor.executeRollbackUsingFileListing(SparkCopyOnWriteRollbackActionExecutor.java:69) at org.apache.hudi.table.action.rollback.BaseCopyOnWriteRollbackActionExecutor.executeRollback(BaseCopyOnWriteRollbackActionExecutor.java:81) at org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.doRollbackAndGetStats(BaseRollbackActionExecutor.java:170) at org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.execute(BaseRollbackActionExecutor.java:103) at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.rollback(HoodieSparkCopyOnWriteTable.java:230) at org.apache.hudi.client.AbstractHoodieWriteClient.rollback(AbstractHoodieWriteClient.java:552) at org.apache.hudi.internal.DataSourceInternalWriterHelper.abort(DataSourceInternalWriterHelper.java:91) at org.apache.hudi.spark3.internal.HoodieDataSourceInternalBatchWrite.abort(HoodieDataSourceInternalBatchWrite.java:89) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:403) at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:361) at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:253) at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:259) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:39) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:39) at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.doExecute(V2CommandExec.scala:54) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:944) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:106) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:207) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:88) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:335) at org.apache.hudi.HoodieSparkSqlWriter$.bulkInsertAsRow(HoodieSparkSqlWriter.scala:302) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:127) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:134) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:90) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:124) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:123) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:944) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:106) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:207) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:88) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:944) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:396) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:380) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:269) at hudiwriter.HudiWriter.createHudiTable(HudiWriter.scala:30) at hudiwriter.HudiContext.writeToHudi(HudiContext.scala:44) at jobs.TableProcessor.start(TableProcessor.scala:82) at TableProcessorWrapper$.$anonfun$main$2(TableProcessorWrapper.scala:23) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659) at scala.util.Success.$anonfun$map$1(Try.scala:255) at scala.util.Success.map(Try.scala:213) at scala.concurrent.Future.$anonfun$map$1(Future.scala:292) at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33) at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33) at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64) at java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402) at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:157)
I disabled Hudi metadata and it worked fine.