apache / doris

Apache Doris is an easy-to-use, high performance and unified analytics database.
https://doris.apache.org
Apache License 2.0
11.79k stars 3.11k forks source link

[Bug] select from EXTERNAL TABLE from ELASTICSEARCH nested field has array value error #30033

Open mnloveyx opened 5 months ago

mnloveyx commented 5 months ago

Search before asking

Version

doris-2.0.3-rc06

What's Wrong?

  1. CREAETE EXTERNAL TABLE from ELASTICSEARCH
  2. ES one of FIELD Mapping {"test_line" : { "type" : "nested", "properties" : { "avl_test" : { "type" : "float" }, "test_line" : { "type" : "float" }, "final_avl_test" : { "type" : "float" }, "final_class" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } }, "quota_type" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } }, "state" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } } } }, "test_line_update_time" : { "type" : "date" }, "test_withdrawal" : { "type" : "long" }, "testlimit_reject" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256, "normalizer" : "ignorecase_normalizer" } } } }

*select from table has error**

`org.jkiss.dbeaver.model.sql.DBSQLException: SQL 错误 [1105] [HY000]: errCode = 2, detailMessage = (192.168.10.95)[CANCELLED][RUNTIME_ERROR]Expected value of type: STRING; but found type: Array; Document slice is : [{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"U_USER"},{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"123"},{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"233"},{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"233Test"},{"avl_test":10000.0,"test_line":10000.0,"state":"normal","quota_type":"233Test2"}]

0#  doris::ScrollParser::fill_columns(doris::TupleDescriptor const*, std::vector<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>, std::allocator<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn> > >&, bool*, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const&, cctz::time_zone const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
1#  doris::vectorized::NewEsScanner::_get_next(std::vector<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>, std::allocator<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn> > >&) at /root/src/doris-2.0/be/src/vec/exec/scan/new_es_scanner.cpp:0
2#  doris::vectorized::NewEsScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/src/doris-2.0/be/src/common/status.h:442
3#  doris::vectorized::VScanner::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/src/doris-2.0/be/src/vec/exec/scan/vscanner.cpp:0
4#  doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*, doris::vectorized::ScannerContext*, std::shared_ptr<doris::vectorized::VScanner>) at /root/src/doris-2.0/be/src/common/status.h:354
5#  std::_Function_handler<void (), doris::vectorized::ScannerScheduler::_schedule_scanners(doris::vectorized::ScannerContext*)::$_1::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:701
6#  doris::WorkThreadPool<true>::work_thread(int) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:646
7#  execute_native_thread_routine at /data/gcc-11.1.0/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:85
8#  start_thread
9#  __clone

at org.jkiss.dbeaver.model.impl.jdbc.exec.JDBCStatementImpl.executeStatement(JDBCStatementImpl.java:133)
at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.executeStatement(SQLQueryJob.java:600)
at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.lambda$2(SQLQueryJob.java:503)
at org.jkiss.dbeaver.model.exec.DBExecUtils.tryExecuteRecover(DBExecUtils.java:190)
at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.executeSingleQuery(SQLQueryJob.java:510)
at org.jkiss.dbeaver.ui.editors.sql.execute.SQLQueryJob.extractData(SQLQueryJob.java:962)
at org.jkiss.dbeaver.ui.editors.sql.SQLEditor$QueryResultsContainer.readData(SQLEditor.java:4123)
at org.jkiss.dbeaver.ui.controls.resultset.ResultSetJobDataRead.lambda$0(ResultSetJobDataRead.java:123)
at org.jkiss.dbeaver.model.exec.DBExecUtils.tryExecuteRecover(DBExecUtils.java:190)
at org.jkiss.dbeaver.ui.controls.resultset.ResultSetJobDataRead.run(ResultSetJobDataRead.java:121)
at org.jkiss.dbeaver.ui.controls.resultset.ResultSetViewer$ResultSetDataPumpJob.run(ResultSetViewer.java:5148)
at org.jkiss.dbeaver.model.runtime.AbstractJob.run(AbstractJob.java:105)
at org.eclipse.core.internal.jobs.Worker.run(Worker.java:63)

Caused by: java.sql.SQLException: errCode = 2, detailMessage = (192.168.10.95)[CANCELLED][RUNTIME_ERROR]Expected value of type: STRING; but found type: Array; Document slice is : [{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"U_USER"},{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"123"},{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"233"},{"avl_test":30000.0,"test_line":30000.0,"state":"normal","quota_type":"233Test"},{"avl_test":10000.0,"test_line":10000.0,"state":"normal","quota_type":"233Test2"}]

0#  doris::ScrollParser::fill_columns(doris::TupleDescriptor const*, std::vector<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>, std::allocator<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn> > >&, bool*, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const&, cctz::time_zone const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187
1#  doris::vectorized::NewEsScanner::_get_next(std::vector<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>, std::allocator<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn> > >&) at /root/src/doris-2.0/be/src/vec/exec/scan/new_es_scanner.cpp:0
2#  doris::vectorized::NewEsScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/src/doris-2.0/be/src/common/status.h:442
3#  doris::vectorized::VScanner::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/src/doris-2.0/be/src/vec/exec/scan/vscanner.cpp:0
4#  doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*, doris::vectorized::ScannerContext*, std::shared_ptr<doris::vectorized::VScanner>) at /root/src/doris-2.0/be/src/common/status.h:354
5#  std::_Function_handler<void (), doris::vectorized::ScannerScheduler::_schedule_scanners(doris::vectorized::ScannerContext*)::$_1::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:701
6#  doris::WorkThreadPool<true>::work_thread(int) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:646
7#  execute_native_thread_routine at /data/gcc-11.1.0/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/unique_ptr.h:85
8#  start_thread
9#  __clone

at com.mysql.cj.jdbc.exceptions.SQLError.createSQLException(SQLError.java:130)
at com.mysql.cj.jdbc.exceptions.SQLExceptionsMapping.translateException(SQLExceptionsMapping.java:122)
at com.mysql.cj.jdbc.StatementImpl.executeInternal(StatementImpl.java:767)
at com.mysql.cj.jdbc.StatementImpl.execute(StatementImpl.java:652)
at org.jkiss.dbeaver.model.impl.jdbc.exec.JDBCStatementImpl.execute(JDBCStatementImpl.java:330)
at org.jkiss.dbeaver.model.impl.jdbc.exec.JDBCStatementImpl.executeStatement(JDBCStatementImpl.java:131)
... 12 more

`

What You Expected?

fix it

How to Reproduce?

No response

Anything Else?

No response

Are you willing to submit PR?

Code of Conduct

LemonLiTree commented 5 months ago

try use es catalog? https://doris.apache.org/zh-CN/docs/dev/lakehouse/multi-catalog/es/#array-%E7%B1%BB%E5%9E%8B

mnloveyx commented 5 months ago

try use es catalog? https://doris.apache.org/zh-CN/docs/dev/lakehouse/multi-catalog/es/#array-%E7%B1%BB%E5%9E%8B thank your replay , the problem still exists

image