result=requests.post("http://172.17.1.58:7324/v1/chat/completions",json={
"messages": [
{
"content": "You are a helpful assistant.",
"role": "system"
},
{
"content": "What is the capital of France?",
"role": "user"
}
]
}
)
Error message
INFO: 172.17.1.58:34518 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
Traceback (most recent call last):
File "/venv/lib/python3.12/site-packages/llama_cpp/server/errors.py", line 171, in custom_route_handler
response = await original_route_handler(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(**values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/llama_cpp/server/app.py", line 513, in create_chat_completion
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/starlette/concurrency.py", line 42, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/llama_cpp/llama.py", line 1888, in create_chat_completion
return handler(
^^^^^^^^
File "/venv/lib/python3.12/site-packages/llama_cpp/llama_chat_format.py", line 637, in chat_completion_handler
completion_or_chunks = llama.create_completion(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/llama_cpp/llama.py", line 1722, in create_completion
completion: Completion = next(completion_or_chunks) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/llama_cpp/llama.py", line 1207, in _create_completion
for token in self.generate(
File "/venv/lib/python3.12/site-packages/llama_cpp/llama.py", line 799, in generate
self.eval(tokens)
File "/venv/lib/python3.12/site-packages/llama_cpp/llama.py", line 640, in eval
logits = np.ctypeslib.as_array(
^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/numpy/ctypeslib.py", line 522, in as_array
obj = ctypes.cast(obj, p_arr_type).contents
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: NULL pointer access
Docker image version: 0.2.83
Script
Test request
Error message