apache / doris

Apache Doris is an easy-to-use, high performance and unified analytics database.
https://doris.apache.org
Apache License 2.0
12.32k stars 3.21k forks source link

[Bug] Be core dump when using concat as join key #13857

Closed wangbo closed 1 year ago

wangbo commented 1 year ago

Search before asking

Version

lts-1.2 commit:c08568e

using tpch to make a test table

CREATE TABLE `supplier_100` (
  `s_suppkey` int(11) NOT NULL COMMENT "",
  `s_name` varchar(25) NOT NULL COMMENT "",
  `s_address` varchar(40) NOT NULL COMMENT "",
  `s_nationkey` int(11) NOT NULL COMMENT "",
  `s_phone` varchar(15) NOT NULL COMMENT "",
  `s_acctbal` decimal(15, 2) NOT NULL COMMENT "",
  `s_comment` varchar(101) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`s_suppkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)

insert into  supplier_100 select * from supplier limit 100

the bad case

select t1.index_id,
       t2.sum_xxx
  from (
        select s_suppkey,
               s_name,
               concat(s_suppkey,s_name,'-', s_suppkey)as index_id
          from supplier_100
         group by
      s_suppkey,s_name
       ) t1
  left join (
        select t0.index_id,
               sum_xxx
          from (
                select s_suppkey,
                       s_name,
                       concat(s_suppkey,s_name,'-', s_suppkey)as index_id,
                       sum(s_nationkey) as sum_xxx
                  from supplier_100
                 group by
              s_suppkey,s_name
               ) t0
       )t2
    on t1.index_id = t2.index_id
 order by 
  t1.index_id,
 t2.sum_xxx
limit 1000000

stack

*** Query id: 0-0 ***
*** Aborted at 1667283506 (unix time) try "date -d @1667283506" if you are using GNU date ***
*** Current BE git commitID: c08568e ***
*** SIGABRT unkown detail explain (@0x1f400031fe1) received by PID 204769 (TID 0x7f961bbe2700) from PID 204769; stack trace: ***
 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /root/doris-vector/be/src/common/signal_handler.h:420
 1# 0x00007F975C3F5400 in /lib64/libc.so.6
 2# gsignal in /lib64/libc.so.6
 3# abort in /lib64/libc.so.6
 4# __gnu_cxx::__verbose_terminate_handler() [clone .cold] in /opt/meituan/palo/up/be/lib/palo_be
 5# __cxxabiv1::__terminate(void (*)()) in /opt/meituan/palo/up/be/lib/palo_be
 6# 0x0000560FD1F85141 in /opt/meituan/palo/up/be/lib/palo_be
 7# 0x0000560FD1F85295 in /opt/meituan/palo/up/be/lib/palo_be
 8# doris::vectorized::throwFromErrno(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, int) at /root/doris-vector/be/src/vec/common/exception.cpp:115
 9# Allocator<false, false>::alloc(unsigned long, unsigned long) [clone .isra.0] at /root/doris-vector/be/src/vec/common/allocator.h:134
10# void doris::vectorized::PODArrayBase<1ul, 4096ul, Allocator<false, false>, 15ul, 16ul>::alloc<>(unsigned long) at /root/doris-vector/be/src/vec/common/pod_array.h:138
11# void doris::vectorized::PODArrayBase<1ul, 4096ul, Allocator<false, false>, 15ul, 16ul>::resize<>(unsigned long) at /root/doris-vector/be/src/vec/common/pod_array.h:245
12# doris::vectorized::FunctionStringConcat::execute_impl(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long) at /root/doris-vector/be/src/vec/functions/function_string.h:479
13# doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool) at /root/doris-vector/be/src/vec/functions/function.cpp:251
14# doris::vectorized::PreparedFunctionImpl::default_implementation_for_nulls(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool, bool*) at /root/doris-vector/be/src/vec/functions/function.cpp:221
15# doris::vectorized::PreparedFunctionImpl::execute(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool) at /root/doris-vector/be/src/vec/functions/function.cpp:273
16# doris::vectorized::IFunctionBase::execute(doris_udf::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long, unsigned long, bool) at /root/doris-vector/be/src/vec/functions/function.h:135
17# doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) at /root/doris-vector/be/src/vec/exprs/vectorized_fn_call.cpp:96
18# doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) at /root/doris-vector/be/src/vec/exprs/vexpr_context.cpp:44
19# doris::vectorized::HashJoinNode::_extract_build_join_column(doris::vectorized::Block&, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false>, 15ul, 16ul>&, std::vector<doris::vectorized::IColumn const*, std::allocator<doris::vectorized::IColumn const*> >&, bool&, doris::RuntimeProfile::Counter&) at /root/doris-vector/be/src/vec/exec/join/vhash_join_node.cpp:1229
20# std::__detail::__variant::__gen_vtable_impl<std::__detail::__variant::_Multi_array<std::__detail::__variant::__deduce_visit_result<doris::Status> (*)(doris::vectorized::HashJoinNode::_process_build_block(doris::RuntimeState*, doris::vectorized::Block&, unsigned char)::{lambda(auto:1&&)#1}&&, std::variant<std::monostate, doris::vectorized::SerializedHashTableContext, doris::vectorized::PrimaryTypeHashTableContext<unsigned char>, doris::vectorized::PrimaryTypeHashTableContext<unsigned short>, doris::vectorized::PrimaryTypeHashTableContext<unsigned int>, doris::vectorized::PrimaryTypeHashTableContext<unsigned long>, doris::vectorized::PrimaryTypeHashTableContext<doris::vectorized::UInt128>, doris::vectorized::PrimaryTypeHashTableContext<doris::vectorized::UInt256>, doris::vectorized::FixedKeyHashTableContext<unsigned long, true>, doris::vectorized::FixedKeyHashTableContext<unsigned long, false>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt128, true>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt128, false>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt256, true>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt256, false> >&)>, std::integer_sequence<unsigned long, 1ul> >::__visit_invoke(doris::vectorized::HashJoinNode::_process_build_block(doris::RuntimeState*, doris::vectorized::Block&, unsigned char)::{lambda(auto:1&&)#1}&&, std::variant<std::monostate, doris::vectorized::SerializedHashTableContext, doris::vectorized::PrimaryTypeHashTableContext<unsigned char>, doris::vectorized::PrimaryTypeHashTableContext<unsigned short>, doris::vectorized::PrimaryTypeHashTableContext<unsigned int>, doris::vectorized::PrimaryTypeHashTableContext<unsigned long>, doris::vectorized::PrimaryTypeHashTableContext<doris::vectorized::UInt128>, doris::vectorized::PrimaryTypeHashTableContext<doris::vectorized::UInt256>, doris::vectorized::FixedKeyHashTableContext<unsigned long, true>, doris::vectorized::FixedKeyHashTableContext<unsigned long, false>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt128, true>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt128, false>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt256, true>, doris::vectorized::FixedKeyHashTableContext<doris::vectorized::UInt256, false> >&) at /var/local/ldb-toolchain/include/c++/11/variant:1020
21# doris::vectorized::HashJoinNode::_process_build_block(doris::RuntimeState*, doris::vectorized::Block&, unsigned char) at /root/doris-vector/be/src/vec/exec/join/vhash_join_node.cpp:1336
22# doris::vectorized::HashJoinNode::_hash_table_build(doris::RuntimeState*) at /root/doris-vector/be/src/vec/exec/join/vhash_join_node.cpp:1204
23# doris::vectorized::HashJoinNode::_hash_table_build_thread(doris::RuntimeState*, std::promise<doris::Status>*) at /root/doris-vector/be/src/vec/exec/join/vhash_join_node.cpp:1151
24# std::thread::_State_impl<std::thread::_Invoker<std::tuple<doris::vectorized::HashJoinNode::open(doris::RuntimeState*)::{lambda()#1}> > >::_M_run() at /var/local/ldb-toolchain/include/c++/11/bits/std_thread.h:211
25# execute_native_thread_routine in /opt/meituan/palo/up/be/lib/palo_be
26# start_thread in /lib64/libpthread.so.0
27# clone in /lib64/libc.so.6

What's Wrong?

core when using concat as join key

What You Expected?

return correct result

How to Reproduce?

No response

Anything Else?

No response

Are you willing to submit PR?

Code of Conduct

wangbo commented 1 year ago

This is because join could convert build side column to nullable, but funcation concat didn't deal the case when args 's type is nullable. I'll fix it.

wangbo commented 1 year ago

this has been solved by #13655