pingcap / tiflash

The analytical engine for TiDB and TiDB Cloud. Try free: https://tidbcloud.com/free-trial
https://docs.pingcap.com/tidb/stable/tiflash-overview
Apache License 2.0
950 stars 410 forks source link

Decode error when "NULL" value in the column with "primary key" flag #5859

Closed JaySon-Huang closed 2 years ago

JaySon-Huang commented 2 years ago

Bug Report

Please answer these questions before submitting your issue. Thanks!

1. Minimal reproduce step (Required)

TiFlash assumes all values in the column with "primary key" flag are non-null value. https://github.com/pingcap/tiflash/blob/6d0cbc8cc78b4048fb88e0845d6c9c2881e01515/dbms/src/Storages/Transaction/RowCodec.cpp#L323-L330

However, users could create such "NULL" value in the "primary key" column by following steps

-- create table with `source` is nullable
create table test.t_case (
`case_no` varchar(32) not null,
`source` varchar(20) default null,
`p` varchar(12) DEFAULT NULL,
primary key (`case_no`)
);

-- insert some data and left `source` to be empty
insert into test.t_case(case_no)
values ("1"), ("2"), ("3"), ("4");

-- drop the primary key, fill the `source` to be non-empty
alter table t_case drop primary key;
update test.t_case set `source` = '' where `source` is NULL;
-- add new primary key with case_no and source
alter table t_case add primary key (case_no, source);

-- send the snapshot data to tiflash
alter table t_case set tiflash replica 1;

2. What did you expect to see? (Required)

No exception thrown

3. What did you see instead (Required)

Exception throw with "Parameter out of bound in IColumnString::insertRangeFrom method" in v4.0.x/v5.0.x/v5.1.x/v5.2.x/v5.3.x/v5.4.0~v5.4.1/v6.1.0

[2022/09/13 15:30:02.786 +08:00] [ERROR] [<unknown>] ["bool DB::GCManager::work():Code: 12, e.displayText() = DB::Exception: Parameter out of bound in IColumnString::insertRangeFrom method, start=0, length=8, src_size=4:  while reading DTFile /data2/jaysonhuang/tiup-deploy/data/tiflash-5018/data/t_77/stable/dmf_2: while apply gc Segment [1] [range=[-9223372036854775808,9223372036854775807)] [table=t_77], e.what() = DB::Exception, Stack trace:
0. bin/tiflash/tiflash(StackTrace::StackTrace()+0x16) [0x36d42e6]
1. bin/tiflash/tiflash(DB::Exception::Exception(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int)+0x26) [0x36c8116]
2. bin/tiflash/tiflash(DB::ColumnString::insertRangeFrom(DB::IColumn const&, unsigned long, unsigned long)+0x2ff) [0x759712f]
3. bin/tiflash/tiflash(DB::DM::copyColumnsData(std::vector<COWPtr<DB::IColumn>::immutable_ptr<DB::IColumn>, std::allocator<COWPtr<DB::IColumn>::immutable_ptr<DB::IColumn> > > const&, COWPtr<DB::IColumn>::immutable_ptr<DB::IColumn> const&, std::vector<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn>, std::allocator<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, unsigned long, unsigned long, DB::DM::RowKeyRange const*)+0x7fe) [0x76f690e]
4. bin/tiflash/tiflash(DB::DM::DPFileReader::readRowsOnce(std::vector<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn>, std::allocator<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, unsigned long, unsigned long, DB::DM::RowKeyRange const*)+0x135) [0x77059c5]
5. bin/tiflash/tiflash(DB::DM::DPFileReader::readRows(std::vector<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn>, std::allocator<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, unsigned long, unsigned long, DB::DM::RowKeyRange const*)+0x55) [0x7706d85]
6. bin/tiflash/tiflash(DB::DM::DeltaValueReader::readRows(std::vector<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn>, std::allocator<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, unsigned long, unsigned long, DB::DM::RowKeyRange const*)+0xcf) [0x771c34f]
7. bin/tiflash/tiflash(DB::DM::DeltaMergeBlockInputStream<DB::DM::DeltaValueReader, DB::DM::DTCompactedEntries<55ul, 20ul, 3ul>::Iterator, false>::writeInsertFromDelta(std::vector<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn>, std::allocator<COWPtr<DB::IColumn>::mutable_ptr<DB::IColumn> > >&, unsigned long&)+0x94) [0x7698344]
8. bin/tiflash/tiflash(DB::DM::DeltaMergeBlockInputStream<DB::DM::DeltaValueReader, DB::DM::DTCompactedEntries<55ul, 20ul, 3ul>::Iterator, false>::read()+0x863) [0x769b593]
9. bin/tiflash/tiflash(DB::DM::DMRowKeyFilterBlockInputStream<true>::read()+0x5f) [0x7692d2f]
10. bin/tiflash/tiflash(DB::DM::PKSquashingBlockInputStream<false>::read()+0x9f1) [0x7698d61]
11. bin/tiflash/tiflash(DB::DM::DMVersionFilterBlockInputStream<1>::initNextBlock()+0x2f) [0x7a3dcbf]
12. bin/tiflash/tiflash(DB::DM::DMVersionFilterBlockInputStream<1>::read(DB::PODArray<unsigned char, 4096ul, Allocator<false>, 15ul, 16ul>*&, bool)+0x1dd0) [0x7a411a0]
13. bin/tiflash/tiflash(DB::DM::DMVersionFilterBlockInputStream<1>::read()+0x18) [0x7681968]
14. bin/tiflash/tiflash(DB::DM::writeIntoNewDMFile(DB::DM::DMContext&, std::shared_ptr<std::vector<DB::DM::ColumnDefine, std::allocator<DB::DM::ColumnDefine> > > const&, std::shared_ptr<DB::IBlockInputStream> const&, unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, DB::DM::DMFileWriter::Flags)+0x20b) [0x7670cdb]
15. bin/tiflash/tiflash(DB::DM::createNewStable(DB::DM::DMContext&, std::shared_ptr<std::vector<DB::DM::ColumnDefine, std::allocator<DB::DM::ColumnDefine> > > const&, std::shared_ptr<DB::IBlockInputStream> const&, unsigned long, DB::DM::WriteBatches&)+0xbd) [0x767143d]
16. bin/tiflash/tiflash(DB::DM::Segment::prepareMergeDelta(DB::DM::DMContext&, std::shared_ptr<std::vector<DB::DM::ColumnDefine, std::allocator<DB::DM::ColumnDefine> > > const&, std::shared_ptr<DB::DM::SegmentSnapshot> const&, DB::DM::WriteBatches&) const+0x17d) [0x767ab7d]
17. bin/tiflash/tiflash(DB::DM::DeltaMergeStore::segmentMergeDelta(DB::DM::DMContext&, std::shared_ptr<DB::DM::Segment> const&, DB::DM::DeltaMergeStore::TaskRunThread, std::shared_ptr<DB::DM::SegmentSnapshot>)+0x3e1) [0x76496b1]
18. bin/tiflash/tiflash(DB::DM::DeltaMergeStore::onSyncGc(long)+0x5c1) [0x7650601]
19. bin/tiflash/tiflash(DB::GCManager::work()+0x7b1) [0x7a09911]
20. bin/tiflash/tiflash(DB::BackgroundProcessingPool::threadFunction()+0x947) [0x75dba17]
21. bin/tiflash/tiflash() [0x8b14f6f]
22. /lib64/libpthread.so.0(+0x6e63) [0x7fe4a07f6e63]
23. /lib64/libc.so.6(clone+0x6d) [0x7fe4a022b55d]
"] [thread_id=10]

Exception message after v5.4.2/v6.1.1/v6.2.0

[2022/09/13 15:59:30.623 +08:00] [ERROR] [Exception.cpp:85] ["DB::RawCppPtr DB::PreHandleSnapshot(DB::EngineStoreServerWrap *, DB::BaseBuffView, uint64_t, DB::SSTViewVec, uint64_t, uint64_t):Code: 9, e.displayText() = DB::Exception: Sizes of columns doesn't match: _INTERNAL_DELMARK(id=-1025): 8, source(id=2): 4, e.what() = DB::Exception, Stack trace:
       0x1cdb0ce    DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) [tiflash+30257358]
                    dbms/src/Common/Exception.h:41
       0x65b93c4    DB::Block::checkNumberOfRows() const [tiflash+106664900]
                    dbms/src/Core/Block.cpp:243
       0x735076e    bool DB::RegionBlockReader::readImpl<(DB::TMTPKType)0>(DB::Block&, std::__1::vector<std::__1::tuple<DB::RawTiDBPK, unsigned char, unsigned long, std::__1::shared_ptr<DB::StringObject<false> const> >, std::__1::allocator<std::__1::tuple<DB::RawTiDBPK, unsigned char, unsigned long, std::__1::shared_ptr<DB::StringObject<false> const> > > > const&, bool) [tiflash+120915822]
                    dbms/src/Storages/Transaction/RegionBlockReader.cpp:211
       0x732cc13    DB::GenRegionBlockDataWithSchema(std::__1::shared_ptr<DB::Region> const&, std::__1::shared_ptr<DB::DecodingStorageSchemaSnapshot const> const&, unsigned long, bool, DB::TMTContext&) [tiflash+120769555]
                    dbms/src/Storages/Transaction/PartitionStreams.cpp:606
       0x72ee5a6    DB::DM::SSTFilesToBlockInputStream::readCommitedBlock() [tiflash+120513958]
                    dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp:238
       0x72edcca    DB::DM::SSTFilesToBlockInputStream::read() [tiflash+120511690]
                    dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp:144
       0x71d25c5    DB::DM::readNextBlock(std::__1::shared_ptr<DB::IBlockInputStream> const&) [tiflash+119350725]
                    dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h:253
       0x72eeec2    DB::DM::PKSquashingBlockInputStream<true>::read() [tiflash+120516290]
                    dbms/src/Storages/DeltaMerge/PKSquashingBlockInputStream.h:68
       0x71d25c5    DB::DM::readNextBlock(std::__1::shared_ptr<DB::IBlockInputStream> const&) [tiflash+119350725]
                    dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h:253
       0x1c9f065    DB::DM::DMVersionFilterBlockInputStream<1>::initNextBlock() [tiflash+30011493]
                    dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h:134
       0x1c9d16c    DB::DM::DMVersionFilterBlockInputStream<1>::read(DB::PODArray<unsigned char, 4096ul, Allocator<false>, 15ul, 16ul>*&, bool) [tiflash+30003564]
                    dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp:51
       0x72e954c    DB::DM::SSTFilesToDTFilesOutputStream::write() [tiflash+120493388]
                    dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp:162
       0x72e2dc3    DB::KVStore::preHandleSSTsToDTFiles(std::__1::shared_ptr<DB::Region>, DB::SSTViewVec, unsigned long, unsigned long, DB::DM::FileConvertJobType, DB::TMTContext&) [tiflash+120466883]
                    dbms/src/Storages/Transaction/ApplySnapshot.cpp:340
       0x733183a    PreHandleSnapshot [tiflash+120789050]
                    dbms/src/Storages/Transaction/ProxyFFI.cpp:387
  0x7f059e8b3e2a    raftstore::engine_store_ffi::_$LT$impl$u20$raftstore..engine_store_ffi..interfaces..root..DB..EngineStoreServerHelper$GT$::pre_handle_snapshot::h718eff40e0ff22c6 [libtiflash_proxy.so+33197610]
  0x7f059e86be89    raftstore::store::snap::Snapshot::pre_handle_snapshot::h81c04c547dfcb1a8 [libtiflash_proxy.so+32902793]
  0x7f059df01536    yatp::task::future::RawTask$LT$F$GT$::poll::h68cf0c4ec7b4731e [libtiflash_proxy.so+23029046]
  0x7f059f51ec0d    _$LT$yatp..task..future..Runner$u20$as$u20$yatp..pool..runner..Runner$GT$::handle::h6502e9a2b4ccd66e [libtiflash_proxy.so+46218253]
  0x7f059f52143d    std::sys_common::backtrace::__rust_begin_short_backtrace::hc5d6c9c6e9339eea [libtiflash_proxy.so+46228541]
  0x7f059f51413d    core::ops::function::FnOnce::call_once$u7b$$u7b$vtable.shim$u7d$$u7d$::h8a6505e26346d93a [libtiflash_proxy.so+46174525]
  0x7f059ec5ca0a    std::sys::unix::thread::Thread::new::thread_start::hd39c5f08bdcda277 [libtiflash_proxy.so+37034506]
  0x7f059bfece63    start_thread [libpthread.so.0+28259]
                    /root/yuzhao/glibc-2.17/nptl/pthread_create.c:308
  0x7f059bb0655d    __clone [libc.so.6+968029]
                    /root/yuzhao/glibc-2.17/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:113"] [thread_id=48]

4. What is your TiFlash version? (Required)

v5.4.1

JaySon-Huang commented 1 year ago

Fixed in 5.4.3/6.1.2/6.3.0 and later versions