predibase / lorax

Multi-LoRA inference server that scales to 1000s of fine-tuned LLMs
https://loraexchange.ai
Apache License 2.0
1.86k stars 125 forks source link

Important: In latest main, the server can not serve more than 1 user #512

Open prd-tuong-nguyen opened 2 weeks ago

prd-tuong-nguyen commented 2 weeks ago

System Info

Meet this error when more than 1 user request to server (I try to run previous image version and it still work fine)

ID not found in entries. This is a bug.
stack backtrace:
   0:     0x55acc1cd4f5c - std::backtrace_rs::backtrace::libunwind::trace::h67a838aed1f4d6ec
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
   1:     0x55acc1cd4f5c - std::backtrace_rs::backtrace::trace_unsynchronized::h1d1786bb1962baf8
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
   2:     0x55acc1cd4f5c - std::sys_common::backtrace::_print_fmt::h5a0b1f807a002d23
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:67:5
   3:     0x55acc1cd4f5c - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hf84ab6ad0b91784c
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:44:22
   4:     0x55acc1d01d8c - core::fmt::rt::Argument::fmt::h28f463bd1fdabed5
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/fmt/rt.rs:138:9
   5:     0x55acc1d01d8c - core::fmt::write::ha37c23b175e921b3
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/fmt/mod.rs:1114:21
   6:     0x55acc1cd17ce - std::io::Write::write_fmt::haa1b000741bcbbe1
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/io/mod.rs:1763:15
   7:     0x55acc1cd4d44 - std::sys_common::backtrace::_print::h1ff1030b04dfb157
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:47:5
   8:     0x55acc1cd4d44 - std::sys_common::backtrace::print::hb982056c6f29541c
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:34:9
   9:     0x55acc1cd6473 - std::panicking::default_hook::{{closure}}::h11f92f82c62fbd68
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:272:22
  10:     0x55acc1cd6194 - std::panicking::default_hook::hb8810fe276772c66
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:292:9
  11:     0x55acc1cd69f5 - std::panicking::rust_panic_with_hook::hd2f0efd2fec86cb0
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:731:13
  12:     0x55acc1cd68f1 - std::panicking::begin_panic_handler::{{closure}}::h3651b7fc4f61d784
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:609:13
  13:     0x55acc1cd5486 - std::sys_common::backtrace::__rust_end_short_backtrace::hbc468e4b98c7ae04
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/sys_common/backtrace.rs:170:18
  14:     0x55acc1cd6642 - rust_begin_unwind
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:597:5
  15:     0x55acc12e1075 - core::panicking::panic_fmt::h979245e2fdb2fabd
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:72:14
  16:     0x55acc12e1033 - core::panicking::panic_display::h9b355c58fd35af38
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:168:5
  17:     0x55acc12e1033 - core::panicking::panic_str::h187a5146d72e7d2f
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/panicking.rs:152:5
  18:     0x55acc12e1033 - core::option::expect_failed::h7cdfa49208a82a89
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/option.rs:1988:5
  19:     0x55acc172ba7d - lorax_router::infer::filter_send_generations::{{closure}}::h6d30e692d85e9a5c
  20:     0x55acc172ba7d - core::iter::traits::iterator::Iterator::for_each::call::{{closure}}::h68eb01f7ecc7097a
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:854:29
  21:     0x55acc172ba7d - core::iter::traits::iterator::Iterator::fold::h7ab3a222ce402821
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:2639:21
  22:     0x55acc172ba7d - core::iter::traits::iterator::Iterator::for_each::h1a17e8d270d54052
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:857:9
  23:     0x55acc172d276 - lorax_router::infer::filter_send_generations::h687c3ed4fb0824e8
                               at /usr/src/router/src/infer.rs:761:5
  24:     0x55acc17618c6 - lorax_router::infer::prefill::{{closure}}::{{closure}}::h91b7e7435144c2ae
                               at /usr/src/router/src/infer.rs:605:13
  25:     0x55acc175365c - <tracing::instrument::Instrumented<T> as core::future::future::Future>::poll::he0b835ce68a0641f
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tracing-0.1.40/src/instrument.rs:321:9
  26:     0x55acc175365c - lorax_router::infer::prefill::{{closure}}::he668e13682ad69f2
                               at /usr/src/router/src/infer.rs:589:1
  27:     0x55acc175365c - <tracing::instrument::Instrumented<T> as core::future::future::Future>::poll::h0b251984432d2f7c
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tracing-0.1.40/src/instrument.rs:321:9
  28:     0x55acc17366bd - <lorax_router::batch::GenerateBatchEntries as lorax_router::batch::BatchEntries>::process_first::{{closure}}::h0a93f83c73861b5c
                               at /usr/src/router/src/batch.rs:315:10
  29:     0x55acc178db41 - <core::pin::Pin<P> as core::future::future::Future>::poll::h47267cc93a805788
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/future/future.rs:125:9
  30:     0x55acc1766ef6 - lorax_router::infer::batching_task::{{closure}}::h6c42ca99e514e067
                               at /usr/src/router/src/infer.rs:549:26
  31:     0x55acc176602f - tokio::runtime::task::core::Core<T,S>::poll::{{closure}}::h6fa682b46516d869
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/core.rs:328:17
  32:     0x55acc176602f - tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut::h3467b54ed6d4a3fa
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/loom/std/unsafe_cell.rs:16:9
  33:     0x55acc176602f - tokio::runtime::task::core::Core<T,S>::poll::h2227f544428749a2
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/core.rs:317:30
  34:     0x55acc16ffbaf - std::panicking::try::do_call::hded8ccfdb06a0f73
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:504:40
  35:     0x55acc16ffbaf - std::panicking::try::h84e8d909153c6a34
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panicking.rs:468:19
  36:     0x55acc17a1c74 - std::panic::catch_unwind::ha07a70865c5cf819
                               at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/std/src/panic.rs:142:14
  37:     0x55acc17a1c74 - tokio::runtime::task::harness::poll_future::h6242a51ce5628d88
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/harness.rs:473:18
  38:     0x55acc17a1c74 - tokio::runtime::task::harness::Harness<T,S>::poll_inner::he2e75c7551c16150
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/harness.rs:208:27
  39:     0x55acc17a1c74 - tokio::runtime::task::harness::Harness<T,S>::poll::h160dc344962ad018
                               at /usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/task/harness.rs:153:15
{"timestamp":"2024-06-12T07:53:01.887457Z","level":"ERROR","fields":{"message":"Webserver Crashed"},"target":"lorax_launcher"}
{"timestamp":"2024-06-12T07:53:01.887497Z","level":"INFO","fields":{"message":"Shutting down shards"},"target":"lorax_launcher"}
{"timestamp":"2024-06-12T07:53:02.192889Z","level":"INFO","fields":{"message":"Shard terminated"},"target":"lorax_launcher","span":{"rank":0,"name":"shard-manager"},"spans":[{"rank":0,"name":"shard-manager"}]}
Error: WebserverFailed

Information

Tasks

Reproduction

Start server by docker with model microsoft/Phi-3-mini-128k-instruct

Expected behavior

Server can serve many concurrent user at the same time

prd-tuong-nguyen commented 2 weeks ago

Note: This version still works well

bi1101 commented 2 weeks ago

I can confirm this issue is happening, it could go un noticed if the Docker container is set to restart unless stopped