I meet coredump when decoding with multi-thread. It cored in rust function tokenizers_decode,rust/src/lib.rs:199. here is the core backtrack.
why does it do not support multi-thread? I think decode process won't change internal data, and is a object const function. Sentencepiece support it fully
(gdb) bt
0 0x00007fffe0269207 in raise () from /lib64/libc.so.6
1 0x00007fffe026a8f8 in abort () from /lib64/libc.so.6
2 0x00007fffe02abd27 in __libc_message () from /lib64/libc.so.6
3 0x00007fffe02b4489 in _int_free () from /lib64/libc.so.6
4 0x0000000000fd8440 in alloc::alloc::dealloc (ptr=0x7ffe0c0009b0, layout=...) at /builddir/build/BUILD/rustc-1.72.1-src/library/alloc/src/alloc.rs:121
5 alloc::alloc::{impl#1}::deallocate (self=0x1c0323f8, ptr=..., layout=...) at /builddir/build/BUILD/rustc-1.72.1-src/library/alloc/src/alloc.rs:258
6 0x0000000000fda1cb in alloc::raw_vec::{impl#3}::drop<u8, alloc::alloc::Global> (self=0x1c0323f8) at /builddir/build/BUILD/rustc-1.72.1-src/library/alloc/src/raw_vec.rs:485
7 0x0000000000fd63cb in core::ptr::drop_in_place<alloc::raw_vec::RawVec<u8, alloc::alloc::Global>> () at /builddir/build/BUILD/rustc-1.72.1-src/library/core/src/ptr/mod.rs:497
8 0x0000000000fd63a4 in core::ptr::drop_in_place<alloc::vec::Vec<u8, alloc::alloc::Global>> () at /builddir/build/BUILD/rustc-1.72.1-src/library/core/src/ptr/mod.rs:497
9 0x0000000000fd635b in core::ptr::drop_in_place () at /builddir/build/BUILD/rustc-1.72.1-src/library/core/src/ptr/mod.rs:497
10 0x0000000000b414bb in tokenizers_c::TokenizerWrapper::decode (self=0x1c032000, ids=..., skip_special_tokens=false) at src/lib.rs:103
11 0x0000000000b41d90 in tokenizers_c::tokenizers_decode (handle=0x1c032000, input_ids=0x7fff7a95ad6c, len=1, skip_special_tokens=0) at src/lib.rs:207
12 0x0000000000b3f55f in tokenizers::HFTokenizer::Decode (this=0x1c054c80, ids=0x7fff7a95ad6c, len=1, detokenized=0x7ffe180008c8, skip_special_tokens=false)
at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/deps/tokenizer_cpp/src/huggingface_tokenizer.cc:71
13 0x0000000000b3f622 in tokenizers::HFTokenizer::Decode (this=0x1c054c80, ids=0x7fff7a95ad6c, len=1, detokenized=0x7ffe180008c8)
at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/deps/tokenizer_cpp/src/huggingface_tokenizer.cc:79
--Type for more, q to quit, c to continue without paging--
14 0x00000000005d53b0 in ppl::llm::utils::TokenizerImplHF::Decode (this=0xf024800, token_ids=0x7fff7a95ad6c, len=1, output=0x7ffe180008c8)
at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/src/models/../utils/tokenizer_impl_hf.h:120
15 0x00000000005d3d10 in ppl::llm::llama::LlamaTokenizer::Decode (this=0xeffcf70, token_ids=0x7fff7a95ad6c, len=1, output=0x7ffe180008c8)
at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/src/models/llama/llama_tokenizer.h:45
16 0x00000000005e06a3 in ppl::llm::llama::DecodeAndSendTask::Process (this=0x7fff7a95ade0) at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/src/models/llama/llama_worker.cc:87
17 0x00000000005e703f in operator() (__closure=0x1c0572c0, nthr=2, ithr=0) at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/src/models/llama/llama_worker.cc:1037
at /mnt/llm/toolchains/gcc/gcc-10.4.0/build/install/include/c++/10.4.0/bits/std_function.h:622
22 0x000000000092d8e1 in ppl::common::StaticThreadPool::ThreadWorker (arg=0x1749d990) at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/deps/pplcommon/src/ppl/common/threadpool.cc:332
23 0x00007fffe0b0ddd5 in start_thread () from /lib64/libpthread.so.0
I meet coredump when decoding with multi-thread. It cored in rust function
tokenizers_decode
,rust/src/lib.rs:199. here is the core backtrack.why does it do not support multi-thread? I think decode process won't change internal data, and is a object const function. Sentencepiece support it fully
(gdb) bt
0 0x00007fffe0269207 in raise () from /lib64/libc.so.6
1 0x00007fffe026a8f8 in abort () from /lib64/libc.so.6
2 0x00007fffe02abd27 in __libc_message () from /lib64/libc.so.6
3 0x00007fffe02b4489 in _int_free () from /lib64/libc.so.6
4 0x0000000000fd8440 in alloc::alloc::dealloc (ptr=0x7ffe0c0009b0, layout=...) at /builddir/build/BUILD/rustc-1.72.1-src/library/alloc/src/alloc.rs:121
5 alloc::alloc::{impl#1}::deallocate (self=0x1c0323f8, ptr=..., layout=...) at /builddir/build/BUILD/rustc-1.72.1-src/library/alloc/src/alloc.rs:258
6 0x0000000000fda1cb in alloc::raw_vec::{impl#3}::drop<u8, alloc::alloc::Global> (self=0x1c0323f8) at /builddir/build/BUILD/rustc-1.72.1-src/library/alloc/src/raw_vec.rs:485
7 0x0000000000fd63cb in core::ptr::drop_in_place<alloc::raw_vec::RawVec<u8, alloc::alloc::Global>> () at /builddir/build/BUILD/rustc-1.72.1-src/library/core/src/ptr/mod.rs:497
8 0x0000000000fd63a4 in core::ptr::drop_in_place<alloc::vec::Vec<u8, alloc::alloc::Global>> () at /builddir/build/BUILD/rustc-1.72.1-src/library/core/src/ptr/mod.rs:497
9 0x0000000000fd635b in core::ptr::drop_in_place () at /builddir/build/BUILD/rustc-1.72.1-src/library/core/src/ptr/mod.rs:497
10 0x0000000000b414bb in tokenizers_c::TokenizerWrapper::decode (self=0x1c032000, ids=..., skip_special_tokens=false) at src/lib.rs:103
11 0x0000000000b41d90 in tokenizers_c::tokenizers_decode (handle=0x1c032000, input_ids=0x7fff7a95ad6c, len=1, skip_special_tokens=0) at src/lib.rs:207
12 0x0000000000b3f55f in tokenizers::HFTokenizer::Decode (this=0x1c054c80, ids=0x7fff7a95ad6c, len=1, detokenized=0x7ffe180008c8, skip_special_tokens=false)
13 0x0000000000b3f622 in tokenizers::HFTokenizer::Decode (this=0x1c054c80, ids=0x7fff7a95ad6c, len=1, detokenized=0x7ffe180008c8)
--Type for more, q to quit, c to continue without paging--
14 0x00000000005d53b0 in ppl::llm::utils::TokenizerImplHF::Decode (this=0xf024800, token_ids=0x7fff7a95ad6c, len=1, output=0x7ffe180008c8)
15 0x00000000005d3d10 in ppl::llm::llama::LlamaTokenizer::Decode (this=0xeffcf70, token_ids=0x7fff7a95ad6c, len=1, output=0x7ffe180008c8)
16 0x00000000005e06a3 in ppl::llm::llama::DecodeAndSendTask::Process (this=0x7fff7a95ade0) at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/src/models/llama/llama_worker.cc:87
17 0x00000000005e703f in operator() (__closure=0x1c0572c0, nthr=2, ithr=0) at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/src/models/llama/llama_worker.cc:1037
18 0x00000000005f6568 in std::invoke_impl<void, ppl::llm::llama::LLaMAWorker::Work()::<lambda(uint32_t, uint32_t)>&, unsigned int, unsigned int>(std::__invoke_other, struct {...} &) (f=...)
19 0x00000000005f4031 in std::__invoke_r<void, ppl::llm::llama::LLaMAWorker::Work()::<lambda(uint32_t, uint32_t)>&, unsigned int, unsigned int>(struct {...} &) (__fn=...)
20 0x00000000005f0b4c in std::_Function_handler<void(unsigned int, unsigned int), ppl::llm::llama::LLaMAWorker::Work()::<lambda(uint32_t, uint32_t)> >::_M_invoke(const std::_Any_data &, unsigned int &&, unsigned int &&) (functor=..., args#0=@0x7fff7a95aef4: 2, __args#1=@0x7fff7a95aef0: 0) at /mnt/llm/toolchains/gcc/gcc-10.4.0/build/install/include/c++/10.4.0/bits/std_function.h:291
21 0x00000000008e66f5 in std::function<void (unsigned int, unsigned int)>::operator()(unsigned int, unsigned int) const (this=0x1c0572c0, args#0=2, args#1=0)
22 0x000000000092d8e1 in ppl::common::StaticThreadPool::ThreadWorker (arg=0x1749d990) at /mnt/llm/workspace/shengyunrui/ppl.llm.serving/deps/pplcommon/src/ppl/common/threadpool.cc:332
23 0x00007fffe0b0ddd5 in start_thread () from /lib64/libpthread.so.0