ERROR:asyncio:Exception in callback functools.partial(<function _raise_exception_on_finish at 0x7fd321802200>, error_ca
llback=<bound method AsyncLLMEngine._error_callback of <vllm.engine.async_llm_engine.AsyncLLMEngine object at 0x7fd3164
50310>>)
handle: <Handle functools.partial(<function _raise_exception_on_finish at 0x7fd321802200>, error_callback=<bound method
AsyncLLMEngine._error_callback of <vllm.engine.async_llm_engine.AsyncLLMEngine object at 0x7fd316450310>>)>
Traceback (most recent call last):
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 40, in _raise_ex
ception_on_finish
task.result()
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 521, in run_engi
ne_loop
has_requests_in_progress = await asyncio.wait_for(
^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/asyncio/tasks.py", line 479, in wait_for
return fut.result()
^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 495, in engine_s
tep
request_outputs = await self.engine.step_async()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 226, in step_asy
nc
output = await self.model_executor.execute_model_async(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/executor/gpu_executor.py", line 117, in execute_mo
del_async
output = await make_async(self.driver_worker.execute_model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_cont
ext
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/worker/worker.py", line 272, in execute_model
output = self.model_runner.execute_model(seq_group_metadata_list,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_cont
ext
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 731, in execute_mode
l
logits = self.model.compute_logits(hidden_states, sampling_metadata)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/model_executor/models/llama.py", line 369, in comp
ute_logits
logits = self.logits_processor(self.lm_head.weight, hidden_states,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_cal
l_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/lora/layers.py", line 1195, in forward
return type(self.base_layer).forward(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 5
5, in forward
logits *= self.scale
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "uvloop/cbhandles.pyx", line 63, in uvloop.loop.Handle._run
File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 47, in _raise_ex
ception_on_finish
raise AsyncEngineDeadError(
vllm.engine.async_llm_engine.AsyncEngineDeadError: Task finished unexpectedly. This should never happen! Please open an
issue on Github. See stack trace above for the actual cause.
ESC[31mERRORESC[0m: Exception in ASGI application
+ Exception Group Traceback (most recent call last):
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/_utils.py", line 77, in collapse_excgroup
s
| yield
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/base.py", line 186, in __call_
_
| async with anyio.create_task_group() as task_group:
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 680, in __aexit_
_
| raise BaseExceptionGroup(
| ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
+-+---------------- 1 ----------------
| Traceback (most recent call last):
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 4
01, in run_asgi
| result = await app( # type: ignore[func-returns-value]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in
__call__
| return await self.app(scope, receive, send)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
| await super().__call__(scope, receive, send)
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/applications.py", line 113, in __call__
| await self.middleware_stack(scope, receive, send)
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __c
all__
| raise exc
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __c
all__
| await self.app(scope, receive, _send)
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/base.py", line 185, in __cal
l__
| with collapse_excgroups():
| File "/data/tangjiakai/anaconda3/lib/python3.11/contextlib.py", line 155, in __exit__
| self.gen.throw(typ, value, traceback)
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/_utils.py", line 83, in collapse_excgro
ups
| raise exc
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/base.py", line 187, in __cal
l__
| response = await self.dispatch_func(request, call_next)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 164,
in authentication
| return await call_next(request)
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/base.py", line 163, in call_
next
| raise app_exc
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/starlette/middleware/base.py", line 187, in __cal
l__
| response = await self.dispatch_func(request, call_next)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/data/tangjiakai/anaconda3/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 164,
in authentication
| return await call_next(request)
| ^^^^^^^^^^^^^^^^^^^^^^^^
[X] Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Your current environment
🐛 Describe the bug
I run the api server with the following cmd:
Before submitting a new issue...