Open 760485464 opened 3 months ago
我也是同样问题,请问有解决方法么?
我测试是正常的
我这边不行呢,是不是和显卡有关系我用的a100
可能和torch版本有关,你的torch版本是多少,我测试的是2.3.0
我更新一下试试
还是不行 2.3.1
我也遇到一样的问题,torch版本2.3.0。
已经找到问题, 默认安装的transformers版本为4.42.3,把transformers降低到4.40.2即可即可问题
我也遇到同样的问题,torch版本2.3.0。
已经找到问题,默认安装的transformers版本为4.42.3,把transformers转到4.40.2就可以了
如果实在不行的话,就用 transformers 4.42.2 torch 2.3.0 这个时间是2024年8月18日
提交前必须检查以下项目 | The following items must be checked before submission
问题类型 | Type of problem
模型推理和部署 | Model inference and deployment
操作系统 | Operating system
Linux
详细描述问题 | Detailed description of the problem
{ "model": "glm-4v", "stream":false, "messages": [ { "role": "user", "content": "你好" } ] }
Dependencies
No response
运行日志或截图 | Runtime logs or screenshots
:~/autodl-tmp/api-for-open-llm# python server.py 2024-06-28 17:17:19.906 | DEBUG | api.config::281 - SETTINGS: {
"model_name": "glm-4v",
"model_path": "/root/autodl-tmp/models/glm-4v-9b",
"dtype": "bfloat16",
"load_in_8bit": false,
"load_in_4bit": false,
"context_length": 4098,
"chat_template": "glm-4v",
"rope_scaling": null,
"flash_attn": false,
"interrupt_requests": true,
"host": "0.0.0.0",
"port": 8000,
"api_prefix": "/v1",
"engine": "default",
"tasks": [
"llm"
],
"device_map": "auto",
"gpus": null,
"num_gpus": 1,
"activate_inference": true,
"model_names": [
"glm-4v"
],
"api_keys": null
}
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:19<00:00, 1.29s/it]
2024-06-28 17:17:49.914 | INFO | api.models:create_hf_llm:81 - Using HuggingFace Engine
2024-06-28 17:17:49.915 | INFO | api.engine.hf:init:82 - Using glm-4v Model for Chat!
2024-06-28 17:17:49.915 | INFO | api.engine.hf:init:83 - Using <api.templates.glm.GLM4VChatTemplate object at 0x7fa7d2d65b80> for Chat!
INFO: Started server process [7328]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
2024-06-28 17:18:15.390 | DEBUG | api.routes.chat:create_chat_completion:56 - ==== request ====
{'model': 'glm-4v', 'frequency_penalty': 0.0, 'function_call': None, 'functions': None, 'logit_bias': None, 'logprobs': False, 'max_tokens': 1024, 'n': 1, 'presence_penalty': 0.0, 'response_format': None, 'seed': None, 'stop': ['<|observation|>', '', '<|endoftext|>'], 'temperature': 0.9, 'tool_choice': None, 'tools': None, 'top_logprobs': None, 'top_p': 1.0, 'user': None, 'stream': False, 'repetition_penalty': 1.03, 'typical_p': None, 'watermark': False, 'best_of': 1, 'ignore_eos': False, 'use_beam_search': False, 'stop_token_ids': [151336, 151329, 151338], 'skip_special_tokens': True, 'spaces_between_special_tokens': True, 'min_p': 0.0, 'include_stop_str_in_output': False, 'length_penalty': 1.0, 'guided_json': None, 'guided_regex': None, 'guided_choice': None, 'guided_grammar': None, 'guided_decoding_backend': None, 'prompt_or_messages': [{'role': 'user', 'content': [{'type': 'text', 'text': '这张图片是什么地方?'}, {'type': 'image_url', 'image_url': {'url': 'http://djclub.cdn.bcebos.com/uploads/images/pageimg/20230325/64-2303252115313.jpg'}}]}], 'echo': False}
Exception in thread Thread-2:
Traceback (most recent call last):
File "/root/miniconda3/envs/minicpm/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/root/miniconda3/envs/minicpm/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, self._kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/transformers/generation/utils.py", line 1914, in generate
result = self._sample(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/transformers/generation/utils.py", line 2651, in _sample
outputs = self(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 1173, in forward
transformer_outputs = self.transformer(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 1041, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 791, in forward
layer_ret = layer(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 694, in forward
attention_output, kv_cache = self.self_attention(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/glm-4v-9b/modeling_chatglm.py", line 563, in forward
cache_k, cache_v = kv_cache
ValueError: too many values to unpack (expected 2)
INFO: 127.0.0.1:35456 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in call
return await self.app(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/applications.py", line 123, in call
await self.middleware_stack(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/middleware/errors.py", line 186, in call
raise exc
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/middleware/exceptions.py", line 65, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/routing.py", line 756, in call
await self.middleware_stack(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/routing.py", line 297, in handle
await self.app(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/routing.py", line 77, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/routing.py", line 72, in app
response = await func(request)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/fastapi/routing.py", line 278, in app
raw_response = await run_endpoint_function(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
return await dependant.call(*values)
File "/root/autodl-tmp/api-for-open-llm/api/routes/chat.py", line 58, in create_chat_completion
iterator_or_completion = await run_in_threadpool(engine.create_chat_completion, params)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/starlette/concurrency.py", line 42, in run_in_threadpool
return await anyio.to_thread.run_sync(func, args)
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 859, in run
result = context.run(func, *args)
File "/root/autodl-tmp/api-for-open-llm/api/engine/hf.py", line 381, in create_chat_completion
else self._create_chat_completion(params)
File "/root/autodl-tmp/api-for-open-llm/api/engine/hf.py", line 303, in _create_chat_completion
for output in self._generate(params):
File "/root/autodl-tmp/api-for-open-llm/api/engine/hf.py", line 110, in _generate
for output in self.generate_stream_func(self.model, self.tokenizer, params):
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 56, in generator_context
response = gen.send(request)
File "/root/autodl-tmp/api-for-open-llm/api/templates/stream.py", line 80, in generate_stream
for i, new_text in enumerate(streamer):
File "/root/miniconda3/envs/minicpm/lib/python3.8/site-packages/transformers/generation/streamers.py", line 223, in next
value = self.text_queue.get(timeout=self.timeout)
File "/root/miniconda3/envs/minicpm/lib/python3.8/queue.py", line 178, in get
raise Empty
_queue.Empty