====conversation====
[{'role': 'user', 'content': '你好'}]
C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py:226: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\acti
ons-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer,
Exception in thread Thread-7 (generate):
Traceback (most recent call last):
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\threading.py", line 1045, in _bootstrap_inner
self.run()
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\threading.py", line 982, in run
self._target(*self._args, self._kwargs)
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\utils_contextlib.py", line 116, in decorate_context
return func(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\transformers\generation\utils.py", line 1989, in generate
result = self._sample(
^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\transformers\generation\utils.py", line 2932, in _sample
outputs = self(model_inputs, return_dict=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 941, in forward
transformer_outputs = self.transformer(
^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 834, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 641, in forward
layer_ret = layer(
^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 544, in forward
attention_output, kv_cache = self.self_attention(
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 413, in forward
cache_k, cache_v = kv_cache
^^^^^^^^^^^^^^^^
ValueError: too many values to unpack (expected 2)
Traceback (most recent call last):
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\queueing.py", line 536, in process_events
response = await route_utils.call_process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\route_utils.py", line 276, in call_process_api
output = await app.get_blocks().process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\blocks.py", line 1923, in process_api
result = await self.call_function(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\blocks.py", line 1520, in call_function
prediction = await utils.async_iteration(iterator)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 663, in async_iteration
return await iterator.anext()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 656, in anext
return await anyio.to_thread.run_sync(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\anyio_backends_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\anyio_backends_asyncio.py", line 859, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 639, in run_sync_iterator_async
return next(iterator)
^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 801, in gen_wrapper
response = next(iterator)
^^^^^^^^^^^^^^
File "d:\ChatGLM3\basic_demo\web_demo_gradio.py", line 144, in predict
for new_token in streamer:
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\transformers\generation\streamers.py", line 223, in next
value = self.text_queue.get(timeout=self.timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\hzw.conda\envs\glm3-py311\Lib\queue.py", line 179, in get
raise Empty
_queue.Empty
System Info / 系統信息
2.4.0+cu118,py311,windows
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
Reproduction / 复现过程
====conversation==== [{'role': 'user', 'content': '你好'}] C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py:226: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\acti ons-runner_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer, Exception in thread Thread-7 (generate): Traceback (most recent call last): File "C:\Users\hzw.conda\envs\glm3-py311\Lib\threading.py", line 1045, in _bootstrap_inner self.run() File "C:\Users\hzw.conda\envs\glm3-py311\Lib\threading.py", line 982, in run self._target(*self._args, self._kwargs) File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\utils_contextlib.py", line 116, in decorate_context return func(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\transformers\generation\utils.py", line 1989, in generate result = self._sample( ^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\transformers\generation\utils.py", line 2932, in _sample outputs = self(model_inputs, return_dict=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl return self._call_impl(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl return forward_call(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 941, in forward transformer_outputs = self.transformer( ^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl return self._call_impl(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl return forward_call(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 834, in forward hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder( ^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl return self._call_impl(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl return forward_call(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 641, in forward layer_ret = layer( ^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl return self._call_impl(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl return forward_call(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 544, in forward attention_output, kv_cache = self.self_attention( ^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl return self._call_impl(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl return forward_call(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chatglm.py", line 413, in forward cache_k, cache_v = kv_cache ^^^^^^^^^^^^^^^^ ValueError: too many values to unpack (expected 2) Traceback (most recent call last): File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\queueing.py", line 536, in process_events response = await route_utils.call_process_api( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\route_utils.py", line 276, in call_process_api output = await app.get_blocks().process_api( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\blocks.py", line 1923, in process_api result = await self.call_function( ^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\blocks.py", line 1520, in call_function prediction = await utils.async_iteration(iterator) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 663, in async_iteration return await iterator.anext() ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 656, in anext return await anyio.to_thread.run_sync( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\anyio_backends_asyncio.py", line 2177, in run_sync_in_worker_thread return await future ^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\anyio_backends_asyncio.py", line 859, in run result = context.run(func, *args) ^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 639, in run_sync_iterator_async return next(iterator) ^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\gradio\utils.py", line 801, in gen_wrapper response = next(iterator) ^^^^^^^^^^^^^^ File "d:\ChatGLM3\basic_demo\web_demo_gradio.py", line 144, in predict for new_token in streamer: File "C:\Users\hzw.conda\envs\glm3-py311\Lib\site-packages\transformers\generation\streamers.py", line 223, in next value = self.text_queue.get(timeout=self.timeout) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\hzw.conda\envs\glm3-py311\Lib\queue.py", line 179, in get raise Empty _queue.Empty
Expected behavior / 期待表现
repair it