Openai style api for open large language models, using LLMs just as chatgpt! Support for LLaMA, LLaMA-2, BLOOM, Falcon, Baichuan, Qwen, Xverse, SqlCoder, CodeLLaMA, ChatGLM, ChatGLM2, ChatGLM3 etc. 开源大模型的统一后端接口
提交前必须检查以下项目 | The following items must be checked before submission
[X] 请确保使用的是仓库最新代码(git pull),一些问题已被解决和修复。 | Make sure you are using the latest code from the repository (git pull), some issues have already been addressed and fixed.
[X] 我已阅读项目文档和FAQ章节并且已在Issue中对问题进行了搜索,没有找到相似问题和解决方案 | I have searched the existing issues / discussions
问题类型 | Type of problem
模型推理和部署 | Model inference and deployment
操作系统 | Operating system
Linux
详细描述问题 | Detailed description of the problem
Exception in thread Thread-2:
Traceback (most recent call last):
File "/root/anaconda3/envs/glm4v/lib/python3.9/threading.py", line 980, in _bootstrap_inner
self.run()
File "/root/anaconda3/envs/glm4v/lib/python3.9/threading.py", line 917, in run
self._target(*self._args, *self._kwargs)
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
INFO: 172.16.18.52:54156 - "POST /v1/chat/completions HTTP/1.1" 200 OK
return func(args, **kwargs)
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/transformers/generation/utils.py", line 2024, in generate
result = self._sample(
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/transformers/generation/utils.py", line 3032, in _sample
model_kwargs = self._update_model_kwargs_for_generation(
File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 1083, in _update_model_kwargs_for_generation
model_kwargs["past_key_values"] = self._extract_past_from_model_output(
TypeError: _extract_past_from_model_output() got an unexpected keyword argument 'standardize_cache_format'
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 289, in call
await wrap(partial(self.listen_for_disconnect, receive))
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 278, in wrap
await func()
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 228, in listen_for_disconnect
message = await receive()
File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/uvicorn/protocols/http/httptools_impl.py", line 555, in receive
await self.message_event.wait()
File "/root/anaconda3/envs/glm4v/lib/python3.9/asyncio/locks.py", line 226, in wait
await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 7f3a0f7c8e50
During handling of the above exception, another exception occurred:
Exception Group Traceback (most recent call last):
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/uvicorn/protocols/http/httptools_impl.py", line 401, in run_asgi
| result = await app( # type: ignore[func-returns-value]
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in call
| return await self.app(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/fastapi/applications.py", line 1054, in call
| await super().call(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/applications.py", line 113, in call
| await self.middleware_stack(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/middleware/errors.py", line 187, in call
| raise exc
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/middleware/errors.py", line 165, in call
| await self.app(scope, receive, _send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/middleware/cors.py", line 85, in call
| await self.app(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/middleware/exceptions.py", line 62, in call
| await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/_exception_handler.py", line 62, in wrapped_app
| raise exc
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/_exception_handler.py", line 51, in wrapped_app
| await app(scope, receive, sender)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/routing.py", line 715, in call
| await self.middleware_stack(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/routing.py", line 735, in app
| await route.handle(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/routing.py", line 288, in handle
| await self.app(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/routing.py", line 76, in app
| await wrap_app_handling_exceptions(app, request)(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/_exception_handler.py", line 62, in wrapped_app
| raise exc
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/_exception_handler.py", line 51, in wrapped_app
| await app(scope, receive, sender)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/routing.py", line 74, in app
| await response(scope, receive, send)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 289, in call
| await wrap(partial(self.listen_for_disconnect, receive))
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 680, in aexit
| raise BaseExceptionGroup(
| exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
+-+---------------- 1 ----------------
| Traceback (most recent call last):
| File "/home/glm/api-for-open-llm/api/utils.py", line 149, in get_event_publisher
| async for chunk in iterate_in_threadpool(iterator):
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/concurrency.py", line 62, in iterate_in_threadpool
| yield await anyio.to_thread.run_sync(_next, as_iterator)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
| return await get_async_backend().run_sync_in_worker_thread(
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
| return await future
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 859, in run
| result = context.run(func, *args)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/starlette/concurrency.py", line 51, in _next
| return next(iterator)
| File "/home/glm/api-for-open-llm/api/routes/chat.py", line 68, in iterator
| yield from iterator_or_completion
| File "/home/glm/api-for-open-llm/api/engine/hf.py", line 209, in _create_chat_completion_stream
| for i, output in enumerate(self._generate(params)):
| File "/home/glm/api-for-open-llm/api/engine/hf.py", line 110, in _generate
| for output in self.generate_stream_func(self.model, self.tokenizer, params):
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 57, in generator_context
| response = gen.send(request)
| File "/home/glm/api-for-open-llm/api/templates/stream.py", line 80, in generate_stream
| for i, new_text in enumerate(streamer):
| File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/transformers/generation/streamers.py", line 223, in next
| value = self.text_queue.get(timeout=self.timeout)
| File "/root/anaconda3/envs/glm4v/lib/python3.9/queue.py", line 179, in get
| raise Empty
| _queue.Empty
+------------------------------------
提交前必须检查以下项目 | The following items must be checked before submission
问题类型 | Type of problem
模型推理和部署 | Model inference and deployment
操作系统 | Operating system
Linux
详细描述问题 | Detailed description of the problem
Exception in thread Thread-2: Traceback (most recent call last): File "/root/anaconda3/envs/glm4v/lib/python3.9/threading.py", line 980, in _bootstrap_inner self.run() File "/root/anaconda3/envs/glm4v/lib/python3.9/threading.py", line 917, in run self._target(*self._args, *self._kwargs) File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context INFO: 172.16.18.52:54156 - "POST /v1/chat/completions HTTP/1.1" 200 OK return func(args, **kwargs) File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/transformers/generation/utils.py", line 2024, in generate result = self._sample( File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/transformers/generation/utils.py", line 3032, in _sample model_kwargs = self._update_model_kwargs_for_generation( File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 1083, in _update_model_kwargs_for_generation model_kwargs["past_key_values"] = self._extract_past_from_model_output( TypeError: _extract_past_from_model_output() got an unexpected keyword argument 'standardize_cache_format' ERROR: Exception in ASGI application Traceback (most recent call last): File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 289, in call await wrap(partial(self.listen_for_disconnect, receive)) File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 278, in wrap await func() File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/sse_starlette/sse.py", line 228, in listen_for_disconnect message = await receive() File "/root/anaconda3/envs/glm4v/lib/python3.9/site-packages/uvicorn/protocols/http/httptools_impl.py", line 555, in receive await self.message_event.wait() File "/root/anaconda3/envs/glm4v/lib/python3.9/asyncio/locks.py", line 226, in wait await fut asyncio.exceptions.CancelledError: Cancelled by cancel scope 7f3a0f7c8e50
During handling of the above exception, another exception occurred:
Dependencies
提示线程错误 尝试过切换torch版本无效
运行日志或截图 | Runtime logs or screenshots