prestodb / presto

The official home of the Presto distributed SQL query engine for big data
http://prestodb.io
Apache License 2.0
16.04k stars 5.37k forks source link

[native] how to query kerberos secured hdfs #22816

Open iBuddha opened 5 months ago

iBuddha commented 5 months ago

Describe the problem you faced

can't query a hive table, with exception stack

2024-05-23 08:41:35.063373, p15646, th140390107694848, ERROR cannot setup block reader for Block: [block pool ID: BP-1558730017-10.129.88.15-1638771856649 block ID 1255941812_182300257] file /presto-dev/order/datetime=2023020715/000040_0_20230223_075329_10632_5yt9g on Datanode: t-qcbj5-hadoop-dn-001.test-qcloud(10.129.88.22).
TcpSocket.cpp: 73: HdfsEndOfStream: Read 8 bytes failed from "10.129.88.22:10040": End of the stream
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   facebook::velox::HdfsReadFile::preadInternal(unsigned long, unsigned long, char*) const
    @   facebook::velox::HdfsReadFile::pread(unsigned long, unsigned long, void*) const
    @   facebook::velox::ReadFile::preadv(unsigned long, std::vector<folly::Range<char*>, std::allocator<folly::Range<char*> > > const&) const
    @   facebook::velox::dwio::common::ReadFileInputStream::read(std::vector<folly::Range<char*>, std::allocator<folly::Range<char*> > > const&, unsigned long, facebook::velox::dwio::common::MetricsLog::MetricsType)
    @   facebook::velox::cache::readPins(std::vector<facebook::velox::cache::CachePin, std::allocator<facebook::velox::cache::CachePin> > const&, int, int, std::function<unsigned long (int)>, std::function<void (std::vector<facebook::velox::cache::CachePin, std::allocator<facebook::velox::cache::CachePin> > const&, int, int, unsigned long, std::vector<folly::Range<char*>, std::allocator<folly::Range<char*> > > const&)>)
    @   facebook::velox::dwio::common::(anonymous namespace)::DwioCoalescedLoad::loadData(bool)
    @   facebook::velox::cache::CoalescedLoad::loadOrFuture(folly::SemiFuture<bool>*)
    @   facebook::velox::dwio::common::CacheInputStream::loadPosition() [clone .localalias]
    @   facebook::velox::dwio::common::CacheInputStream::Next(void const**, int*)
    @   facebook::velox::parquet::ReaderBase::loadFileMetaData()
    @   facebook::velox::parquet::ReaderBase::ReaderBase(std::unique_ptr<facebook::velox::dwio::common::BufferedInput, std::default_delete<facebook::velox::dwio::common::BufferedInput> >, facebook::velox::dwio::common::ReaderOptions const&)
    @   facebook::velox::parquet::ParquetReader::ParquetReader(std::unique_ptr<facebook::velox::dwio::common::BufferedInput, std::default_delete<facebook::velox::dwio::common::BufferedInput> >, facebook::velox::dwio::common::ReaderOptions const&)
    @   facebook::velox::parquet::ParquetReaderFactory::createReader(std::unique_ptr<facebook::velox::dwio::common::BufferedInput, std::default_delete<facebook::velox::dwio::common::BufferedInput> >, facebook::velox::dwio::common::ReaderOptions const&)
    @   facebook::velox::connector::hive::SplitReader::createReader()
    @   facebook::velox::connector::hive::SplitReader::prepareSplit(std::shared_ptr<facebook::velox::common::MetadataFilter>, facebook::velox::dwio::common::RuntimeStatistics&)
    @   facebook::velox::connector::hive::HiveDataSource::addSplit(std::shared_ptr<facebook::velox::connector::ConnectorSplit>)
    @   facebook::velox::exec::TableScan::getOutput()
    @   facebook::velox::exec::Driver::runInternal(std::shared_ptr<facebook::velox::exec::Driver>&, std::shared_ptr<facebook::velox::exec::BlockingState>&, std::shared_ptr<facebook::velox::RowVector>&)
    @   facebook::velox::exec::Driver::run(std::shared_ptr<facebook::velox::exec::Driver>)
    @   void folly::detail::function::FunctionTraits<void ()>::callSmall<facebook::velox::exec::Driver::enqueue(std::shared_ptr<facebook::velox::exec::Driver>)::{lambda()#1}>(folly::detail::function::Data&)
    @   folly::detail::function::FunctionTraits<void ()>::operator()()
    @   folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&)
    @   folly::CPUThreadPoolExecutor::threadRun(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)
    @   void std::__invoke_impl<void, void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&>(std::__invoke_memfun_deref, void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&)
    @   std::__invoke_result<void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&>::type std::__invoke<void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&>(void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&)
    @   void std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>::__call<void, , 0ul, 1ul>(std::tuple<>&&, std::_Index_tuple<0ul, 1ul>)
    @   void std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>::operator()<, void>()
    @   void folly::detail::function::FunctionTraits<void ()>::callSmall<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)> >(folly::detail::function::Data&)
    @   Unknown
    @   start_thread
    @   __GI___clone

retry another node
2024-05-23 08:41:35.323331, p15646, th140390124480256, ERROR cannot setup block reader for Block: [block pool ID: BP-1558730017-10.129.88.15-1638771856649 block ID 1255941832_182300277] file /presto-dev/order/datetime=2023020715/000032_0_20230223_075329_10632_5yt9g on Datanode: t-qcbj5-hadoop-dn-002.test-qcloud(10.129.88.49).
TcpSocket.cpp: 73: HdfsEndOfStream: Read 8 bytes failed from "10.129.88.49:10040": End of the stream
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   Unknown
    @   facebook::velox::HdfsReadFile::preadInternal(unsigned long, unsigned long, char*) const
    @   facebook::velox::HdfsReadFile::pread(unsigned long, unsigned long, void*) const
    @   facebook::velox::ReadFile::preadv(unsigned long, std::vector<folly::Range<char*>, std::allocator<folly::Range<char*> > > const&) const
    @   facebook::velox::dwio::common::ReadFileInputStream::read(std::vector<folly::Range<char*>, std::allocator<folly::Range<char*> > > const&, unsigned long, facebook::velox::dwio::common::MetricsLog::MetricsType)
    @   facebook::velox::cache::readPins(std::vector<facebook::velox::cache::CachePin, std::allocator<facebook::velox::cache::CachePin> > const&, int, int, std::function<unsigned long (int)>, std::function<void (std::vector<facebook::velox::cache::CachePin, std::allocator<facebook::velox::cache::CachePin> > const&, int, int, unsigned long, std::vector<folly::Range<char*>, std::allocator<folly::Range<char*> > > const&)>)
    @   facebook::velox::dwio::common::(anonymous namespace)::DwioCoalescedLoad::loadData(bool)
    @   facebook::velox::cache::CoalescedLoad::loadOrFuture(folly::SemiFuture<bool>*)
    @   facebook::velox::dwio::common::CacheInputStream::loadPosition() [clone .localalias]
    @   facebook::velox::dwio::common::CacheInputStream::Next(void const**, int*)
    @   facebook::velox::parquet::ReaderBase::loadFileMetaData()
    @   facebook::velox::parquet::ReaderBase::ReaderBase(std::unique_ptr<facebook::velox::dwio::common::BufferedInput, std::default_delete<facebook::velox::dwio::common::BufferedInput> >, facebook::velox::dwio::common::ReaderOptions const&)
    @   facebook::velox::parquet::ParquetReader::ParquetReader(std::unique_ptr<facebook::velox::dwio::common::BufferedInput, std::default_delete<facebook::velox::dwio::common::BufferedInput> >, facebook::velox::dwio::common::ReaderOptions const&)
    @   facebook::velox::parquet::ParquetReaderFactory::createReader(std::unique_ptr<facebook::velox::dwio::common::BufferedInput, std::default_delete<facebook::velox::dwio::common::BufferedInput> >, facebook::velox::dwio::common::ReaderOptions const&)
    @   facebook::velox::connector::hive::SplitReader::createReader()
    @   facebook::velox::connector::hive::SplitReader::prepareSplit(std::shared_ptr<facebook::velox::common::MetadataFilter>, facebook::velox::dwio::common::RuntimeStatistics&)
    @   facebook::velox::connector::hive::HiveDataSource::addSplit(std::shared_ptr<facebook::velox::connector::ConnectorSplit>)
    @   facebook::velox::exec::TableScan::getOutput()
    @   facebook::velox::exec::Driver::runInternal(std::shared_ptr<facebook::velox::exec::Driver>&, std::shared_ptr<facebook::velox::exec::BlockingState>&, std::shared_ptr<facebook::velox::RowVector>&)
    @   facebook::velox::exec::Driver::run(std::shared_ptr<facebook::velox::exec::Driver>)
    @   void folly::detail::function::FunctionTraits<void ()>::callSmall<facebook::velox::exec::Driver::enqueue(std::shared_ptr<facebook::velox::exec::Driver>)::{lambda()#1}>(folly::detail::function::Data&)
    @   folly::detail::function::FunctionTraits<void ()>::operator()()
    @   folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&)
    @   folly::CPUThreadPoolExecutor::threadRun(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)
    @   void std::__invoke_impl<void, void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&>(std::__invoke_memfun_deref, void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&)
    @   std::__invoke_result<void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&>::type std::__invoke<void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&>(void (folly::ThreadPoolExecutor::*&)(std::shared_ptr<folly::ThreadPoolExecutor::Thread>), folly::ThreadPoolExecutor*&, std::shared_ptr<folly::ThreadPoolExecutor::Thread>&)
    @   void std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>::__call<void, , 0ul, 1ul>(std::tuple<>&&, std::_Index_tuple<0ul, 1ul>)
    @   void std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>::operator()<, void>()
    @   void folly::detail::function::FunctionTraits<void ()>::callSmall<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)> >(folly::detail::function::Data&)
    @   Unknown
    @   start_thread
    @   __GI___clone

retry another node

Environment Description

majetideepak commented 5 months ago

There is no support for Kerberos in Prestissimo.

goldenbean commented 5 months ago

the olap team of Vipshop is currently working on this issue, in order to enable accessing kerberos secured hdfs. Please feel free to contact me @iBuddha

majetideepak commented 5 months ago

There is also a discussion here https://github.com/facebookincubator/velox/discussions/9969 on this topic. @iBuddha, @goldenbean please share your thoughts in that discussion.