trinodb / trino

Official repository of Trino, the distributed SQL query engine for big data, formerly known as PrestoSQL (https://trino.io)
https://trino.io
Apache License 2.0
10.24k stars 2.95k forks source link

trino query hive table with bz2 file error #22201

Open code-snail opened 4 months ago

code-snail commented 4 months ago

(pyhive.exc.DatabaseError) {u'errorCode': 16777217, u'message': u'Failed to read file at hdfs://nameservice1/user/xx/xx/jx/xx/20240501/xxx.2024-05-01.xx.bz2', u'errorType': u'EXTERNAL', u'failureInfo': {u'cause': {u'suppressed': [], u'message': u'bad block header', u'errorInfo': {u'code': 65536, u'type': u'INTERNAL_ERROR', u'name': u'GENERIC_INTERNAL_ERROR'}, u'type': u'java.io.IOException', u'stack': [u'io.airlift.compress.bzip2.CBZip2InputStream.initBlock(CBZip2InputStream.java:474)', u'io.airlift.compress.bzip2.CBZip2InputStream.changeStateToProcessABlock(CBZip2InputStream.java:306)', u'io.airlift.compress.bzip2.CBZip2InputStream.read(CBZip2InputStream.java:383)', u'io.airlift.compress.bzip2.BZip2HadoopInputStream.read(BZip2HadoopInputStream.java:51)', u'java.base/java.io.InputStream.readNBytes(InputStream.java:509)', u'io.trino.hive.formats.line.text.TextLineReader.fillBuffer(TextLineReader.java:288)', u'io.trino.hive.formats.line.text.TextLineReader.readLine(TextLineReader.java:189)', u'io.trino.plugin.hive.line.LinePageSource.getNextPage(LinePageSource.java:61)', u'io.trino.plugin.hive.HivePageSource.getNextPage(HivePageSource.java:166)', u'io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:266)', u'io.trino.operator.Driver.processInternal(Driver.java:398)', u'io.trino.operator.Driver.lambda$process$8(Driver.java:301)', u'io.trino.operator.Driver.tryWithLock(Driver.java:704)', u'io.trino.operator.Driver.process(Driver.java:293)', u'io.trino.operator.Driver.processForDuration(Driver.java:264)', u'io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:887)', u'io.trino.execution.executor.dedicated.SplitProcessor.run(SplitProcessor.java:76)', u'io.trino.execution.executor.dedicated.TaskEntry$VersionEmbedderBridge.lambda$run$0(TaskEntry.java:191)', u'io.trino.$gen.Trino_444__20240530_062014_2.run(Unknown Source)', u'io.trino.execution.executor.dedicated.TaskEntry$VersionEmbedderBridge.run(TaskEntry.java:192)', u'io.trino.execution.executor.scheduler.FairScheduler.runTask(FairScheduler.java:174)', u'io.trino.execution.executor.scheduler.FairScheduler.lambda$submit$0(FairScheduler.java:161)', u'java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)', u'com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:131)', u'com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:76)', u'com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:82)', u'java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)', u'java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)', u'java.base/java.lang.Thread.run(Thread.java:1570)']}, u'type': u'io.trino.spi.TrinoException', u'message': u'Failed to read file at hdfs://nameservice1/user/data/realtime/jx/deepfmlog/20240501/sdDeepfmResultLog.2024-05-01.dmp-007.dat.bz2', u'errorInfo': {u'code': 16777217, u'type': u'EXTERNAL', u'name': u'HIVE_CURSOR_ERROR'}, u'suppressed': [], u'stack': [u'io.trino.plugin.hive.line.LinePageSource.getNextPage(LinePageSource.java:75)', u'io.trino.plugin.hive.HivePageSource.getNextPage(HivePageSource.java:166)', u'io.trino.operator.TableScanOperator.getOutput(TableScanOperator.java:266)', u'io.trino.operator.Driver.processInternal(Driver.java:398)', u'io.trino.operator.Driver.lambda$process$8(Driver.java:301)', u'io.trino.operator.Driver.tryWithLock(Driver.java:704)', u'io.trino.operator.Driver.process(Driver.java:293)', u'io.trino.operator.Driver.processForDuration(Driver.java:264)', u'io.trino.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:887)', u'io.trino.execution.executor.dedicated.SplitProcessor.run(SplitProcessor.java:76)', u'io.trino.execution.executor.dedicated.TaskEntry$VersionEmbedderBridge.lambda$run$0(TaskEntry.java:191)', u'io.trino.$gen.Trino_444__20240530_062014_2.run(Unknown Source)', u'io.trino.execution.executor.dedicated.TaskEntry$VersionEmbedderBridge.run(TaskEntry.java:192)', u'io.trino.execution.executor.scheduler.FairScheduler.runTask(FairScheduler.java:174)', u'io.trino.execution.executor.scheduler.FairScheduler.lambda$submit$0(FairScheduler.java:161)', u'java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)', u'com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:131)', u'com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:76)', u'com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:82)', u'java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)', u'java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)', u'java.base/java.lang.Thread.run(Thread.java:1570)']}, u'errorName': u'HIVE_CURSOR_ERROR'} [SQL: select count(1) from jxftpdata.deepfmlog where dt='20240501' limit 100] (Background on this error at: http://sqlalche.me/e/4xp6)

raunaqmorarka commented 3 months ago

cc: @dain @electrum