tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b-128k", trust_remote_code=True);
model = AutoModel.from_pretrained("THUDM/chatglm3-6b-128k", trust_remote_code=True, device_map="auto").cuda()
model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
================报错信息=================
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:10<00:00, 1.55s/it]
You shouldn't move a model that is dispatched using accelerate hooks.
C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py:228: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at ..\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:455.)
context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer,
Traceback (most recent call last):
File "e:\MustEnglish\Project\Python\HuDongYi\大模型\Test.py", line 7, in
response, history = model.chat(tokenizer, "你好", history=[])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 1067, in chat
outputs = self.generate(inputs, gen_kwargs, eos_token_id=eos_token_id)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\generation\utils.py", line 1914, in generate
result = self._sample(
^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\generation\utils.py", line 2651, in _sample
outputs = self(
^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\accelerate\hooks.py", line 169, in new_forward
output = module._old_forward(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 966, in forward
transformer_outputs = self.transformer(
^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 853, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 652, in forward
layer_ret = layer(
^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\accelerate\hooks.py", line 169, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 558, in forward
layernorm_output = self.input_layernorm(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\accelerate\hooks.py", line 169, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 194, in forward
return (self.weight * hidden_states).to(input_dtype)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!
### Expected behavior / 期待表现
支持单机多卡
System Info / 系統信息
python==3.12.4
transformers==4.42.3
torch==2.3.1+cu121
windows 11
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
Reproduction / 复现过程
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b-128k", trust_remote_code=True); model = AutoModel.from_pretrained("THUDM/chatglm3-6b-128k", trust_remote_code=True, device_map="auto").cuda() model = model.eval() response, history = model.chat(tokenizer, "你好", history=[]) print(response)
================报错信息================= Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:10<00:00, 1.55s/it] You shouldn't move a model that is dispatched using accelerate hooks. C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py:228: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at ..\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:455.) context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer, Traceback (most recent call last): File "e:\MustEnglish\Project\Python\HuDongYi\大模型\Test.py", line 7, in
response, history = model.chat(tokenizer, "你好", history=[])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 1067, in chat
outputs = self.generate(inputs, gen_kwargs, eos_token_id=eos_token_id)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\generation\utils.py", line 1914, in generate
result = self._sample(
^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\generation\utils.py", line 2651, in _sample
outputs = self(
^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\accelerate\hooks.py", line 169, in new_forward
output = module._old_forward(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 966, in forward
transformer_outputs = self.transformer(
^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 853, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 652, in forward
layer_ret = layer(
^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\accelerate\hooks.py", line 169, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 558, in forward
layernorm_output = self.input_layernorm(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python312\Lib\site-packages\accelerate\hooks.py", line 169, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm3-6b-128k\f8437ceb4013677fe1cba4b66806822132399271\modeling_chatglm.py", line 194, in forward
return (self.weight * hidden_states).to(input_dtype)