ydb-platform / ydb

YDB is an open source Distributed SQL Database that combines high availability and scalability with strong consistency and ACID transactions
https://ydb.tech
Apache License 2.0
3.83k stars 532 forks source link

TPCH Q1 scale 100 on column shards doesn't compile #7867

Closed pavelvelikhov closed 1 month ago

pavelvelikhov commented 1 month ago

The CLI reports the following error:

Status: INTERNAL_ERROR
Issues:
<main>: Error: Internal error while compiling query.
    <main>: Error: ydb/core/kqp/query_data/kqp_prepared_query.cpp:219: GetInfo(): requirement info failed.

However the logs contain this error:

2024-08-15T14:20:18.569463Z :KQP_YQL NOTICE: TraceId: 01j5b6jamsf5nnknnjnenq3805, SessionId: CompileActor 2024-08-15 14:20:18.569 NOTE  ydbd(pid=1, tid=0x00007F7CAD7BF700) [common provider] yql_provider_gateway.cpp:21: <main>: Fatal: ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp:484  GetOrCreateColumnId(): requirement false failed, message: Unknown node in OLAP comparison compiler: , code: 1

AST of the query before the error:

(
(let $1 (DataType 'Double))
(let $2 '('0 '1))
(let $3 (DataType 'Uint64))
(let $4 (TupleType $1 $3))
(let $5 (DataType 'Utf8))
(let $6 '('"l_linestatus" $5))
(let $7 '('"l_returnflag" $5))
(let $8 (StructType '('_yql_agg_0 $4) '('_yql_agg_1 $4) '('_yql_agg_2 $4) '('_yql_agg_3 $3) '('_yql_agg_4 $1) '('_yql_agg_5 $1) '('_yql_agg_6 $1) '('_yql_agg_7 $1) $6 $7))
(let $9 '('('"_logical_id" '5645) '('"_id" '"b97a8a71-e6b18086-f6d717a8-83f5a92f") '('"_wide_channels" $8)))
(let $10 (DqPhyStage '() (lambda '() (block '(
  (let $20 '"olap-testing-sas-common/kikimr/pavelvelikhov/tpc/yql_tpch_100_col/lineitem")
  (let $21 (KqpTable $20 '"72075186224045907:51" '"" '1))
  (let $22 '('"l_discount" '"l_extendedprice" '"l_linestatus" '"l_quantity" '"l_returnflag" '"l_shipdate" '"l_tax"))
  (let $23 (KqpBlockReadOlapTableRanges $21 (Void) $22 '() '() (lambda '($30) (block '(
    (let $31 '('lte '"l_shipdate" '('just (Date '"10461"))))
    (let $32 '('?? $31 (Bool 'false)))
    (let $33 '('"l_discount" '"l_extendedprice" '"l_linestatus" '"l_quantity" '"l_returnflag" '"l_tax"))
    (return (TKqpOlapExtractMembers (KqpOlapFilter $30 $32) $33))
  )))))
  (let $24 (lambda '($34 $35 $36 $37 $38 $39 $40) (block '(
    (let $41 (BlockType $1))
    (let $42 (AsScalar (Int32 '1)))
    (let $43 (BlockFunc '"Sub" $41 $42 $34))
    (let $44 (BlockFunc '"Mul" $41 $35 $43))
    (let $45 (BlockFunc '"Add" $41 $42 $39))
    (let $46 (BlockFunc '"Mul" $41 $44 $45))
    (return $36 $38 $37 $35 $34 $37 $35 $44 $46 $40)
  ))))
  (let $25 (AggBlockApply 'avg $1))
  (let $26 (AggBlockApply 'sum $1))
  (let $27 '('($25 '"2") '($25 '"3") '($25 '"4") '((AggBlockApply 'count_all)) '($26 '"5") '($26 '"6") '($26 '"7") '($26 '"8")))
  (let $28 (BlockCombineHashed (WideMap $23 $24) (Void) $2 $27))
  (let $29 (lambda '($47 $48 $49 $50 $51 $52 $53 $54 $55 $56 $57) $49 $50 $51 $52 $53 $54 $55 $56 $47 $48 $57))
  (return (FromFlow (WideMap $28 $29)))
))) $9))
(let $11 (DqCnHashShuffle (TDqOutput $10 '0) '('"8" '"9")))
(let $12 (StructType '('"avg_disc" $1) '('"avg_price" $1) '('"avg_qty" $1) '('"count_order" $3) $6 $7 '('"sum_base_price" $1) '('"sum_charge" $1) '('"sum_disc_price" $1) '('"sum_qty" $1)))
(let $13 '('('"_logical_id" '13893) '('"_id" '"5c8aafd4-c1caaf76-b9039207-f00751d5") '('"_wide_channels" $12)))
(let $14 (DqPhyStage '($11) (lambda '($58) (block '(
  (let $59 (lambda '($67 $68 $69 $70 $71 $72 $73 $74 $75 $76 $77) $75 $76 $67 $68 $69 $70 $71 $72 $73 $74 $77))
  (let $60 (AggBlockApplyState 'avg $1 $4))
  (let $61 (AggBlockApplyState 'count_all $3 $3))
  (let $62 (AggBlockApplyState 'sum $1 $1))
  (let $63 '('($60 '"2") '($60 '"3") '($60 '"4") '($61 '"5") '($62 '"6") '($62 '"7") '($62 '"8") '($62 '"9")))
  (let $64 (BlockMergeFinalizeHashed (WideMap (ToFlow $58) $59) $2 $63))
  (let $65 (Bool 'true))
  (let $66 (lambda '($78 $79 $80 $81 $82 $83 $84 $85 $86 $87 $88) $82 $81 $80 $83 $78 $79 $85 $87 $86 $84 $88))
  (return (FromFlow (WideFromBlocks (WideMap (WideSortBlocks $64 '('('1 $65) '('0 $65))) $66))))
))) $13))
(let $15 (DqCnMerge (TDqOutput $14 '0) '('('"5" '"Asc") '('"4" '"Asc"))))
(let $16 (DqPhyStage '($15) (lambda '($89) (FromFlow (NarrowMap (ToFlow $89) (lambda '($90 $91 $92 $93 $94 $95 $96 $97 $98 $99) (AsStruct '('"avg_disc" $90) '('"avg_price" $91) '('"avg_qty" $92) '('"count_order" $93) '('"l_linestatus" $94) '('"l_returnflag" $95) '('"sum_base_price" $96) '('"sum_charge" $97) '('"sum_disc_price" $98) '('"sum_qty" $99)))))) '('('"_logical_id" '13905) '('"_id" '"2007d364-a5b2ee99-45e817d1-e14c96a2"))))
(let $17 '($10 $14 $16))
(let $18 '('"l_returnflag" '"l_linestatus" '"sum_qty" '"sum_base_price" '"sum_disc_price" '"sum_charge" '"avg_qty" '"avg_price" '"avg_disc" '"count_order"))
(let $19 (DqCnResult (TDqOutput $16 '0) $18))
(return (KqpPhysicalQuery '((KqpPhysicalTx $17 '($19) '() '('('"type" '"generic")))) '((KqpTxResultBinding (ListType $12) '0 '0)) '('('"type" '"query"))))
)
pavelvelikhov commented 1 month ago

The query that fails (actually about 50% of TPCH queries are failing): https://github.com/ydb-platform/ydb/blob/main/ydb/library/benchmarks/queries/tpch/nice/q1.sql

zverevgeny commented 1 month ago

As a workaround you may temporarily disable this predicate pushdown by rewriting date('1998-12-01') -> Cast(date('1998-12-01') as Timestamp)

Tony-Romanov commented 1 month ago

How to reproduce?

pavelvelikhov commented 1 month ago

How to reproduce?

@pashandor789 added a fix to kqp_query_compiler.cpp:

} else if (value.Maybe<TCoDate>()) {
        ssaValue->MutableConstant()->SetTimestamp(FromString<ui16>(nodeValue));

in this PR: https://github.com/ydb-platform/ydb/pull/7972

This seems to solve the problem