import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained("output_models", local_files_only=True)
model = AutoModelForCausalLM.from_pretrained("output_models", trust_remote_code=True, local_files_only=True).to(device)
history = []
query = '你能为我推荐一些书吗?'
response = model.chat(query, history, tokenizer)
print(response)
history.append((query, response))
提示错误:
Traceback (most recent call last):
File "/opt/ai/WisdomShell/CodeShell-7B-Chat-int4/finetune/test.py", line 8, in <module>
model = AutoModelForCausalLM.from_pretrained("output_models", trust_remote_code=True, local_files_only=True).to(device)
File "/root/anaconda3/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 560, in from_pretrained
return model_class.from_pretrained(
File "/root/.cache/huggingface/modules/transformers_modules/output_models/modeling_codeshell.py", line 1056, in from_pretrained
model = load_state_dict_for_qunantied_model(model, state_dict)
File "/root/.cache/huggingface/modules/transformers_modules/output_models/quantizer.py", line 379, in load_state_dict_for_qunantied_model
set_value(model, name, state_dict, is_4bit)
File "/root/.cache/huggingface/modules/transformers_modules/output_models/quantizer.py", line 293, in set_value
weight_data = state_dict[f'{name}.data']
KeyError: 'transformer.h.0.attn.c_attn.weight.data'
我微调了int4模型,目录为: 我执行了下述测试代码(test.py):
提示错误:
请问这是怎么回事?