python test.py
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Traceback (most recent call last):
File "test.py", line 2, in
tokenizer = AutoTokenizer.from_pretrained(".\THUDM\chatglm2-6b", trust_remote_code=True)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\transformers\models\auto\tokenization_auto.py", line 679, in from_pretrained
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, *kwargs)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\transformers\tokenization_utils_base.py", line 1804, in from_pretrained
return cls._from_pretrained(
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\transformers\tokenization_utils_base.py", line 1958, in _from_pretrained
tokenizer = cls(init_inputs, **init_kwargs)
File "C:\Users\polyt/.cache\huggingface\modules\transformers_modules\chatglm2-6b\tokenization_chatglm.py", line 73, in init
self.tokenizer = SPTokenizer(vocab_file)
File "C:\Users\polyt/.cache\huggingface\modules\transformers_modules\chatglm2-6b\tokenization_chatglm.py", line 14, in init
self.sp_model = SentencePieceProcessor(model_file=model_path)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\sentencepiece__init.py", line 447, in Init
self.Load(model_file=model_file, model_proto=model_proto)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\sentencepiece__init__.py", line 905, in Load
return self.LoadFromFile(model_file)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\sentencepiece\init__.py", line 310, in LoadFromFile
return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg)
RuntimeError: Internal: D:\a\sentencepiece\sentencepiece\src\sentencepiece_processor.cc(1102) [model_proto->ParseFromArray(serialized.data(), serialized.size())]
Expected Behavior
No response
Steps To Reproduce
On Windows 11, run this script:
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained(".\THUDM\chatglm2-6b", trust_remote_code=True)
model = AutoModel.from_pretrained(".\THUDM\chatglm2-6b", trust_remote_code=True, device='cuda')
model = model.eval()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)
response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
print(response)
Environment
- OS: Windows 11
- Python: 3.8.16
- Transformers: 4.27.1
- PyTorch: 2.0.1
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) : False
Anything else?
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\bin\nvcc.exe" --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Mon_Sep_13_20:11:50_Pacific_Daylight_Time_2021
Cuda compilation tools, release 11.5, V11.5.50
Build cuda_11.5.r11.5/compiler.30411180_0
Is there an existing issue for this?
Current Behavior
python test.py Explicitly passing a
tokenizer = AutoTokenizer.from_pretrained(".\THUDM\chatglm2-6b", trust_remote_code=True)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\transformers\models\auto\tokenization_auto.py", line 679, in from_pretrained
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, *kwargs)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\transformers\tokenization_utils_base.py", line 1804, in from_pretrained
return cls._from_pretrained(
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\transformers\tokenization_utils_base.py", line 1958, in _from_pretrained
tokenizer = cls(init_inputs, **init_kwargs)
File "C:\Users\polyt/.cache\huggingface\modules\transformers_modules\chatglm2-6b\tokenization_chatglm.py", line 73, in init
self.tokenizer = SPTokenizer(vocab_file)
File "C:\Users\polyt/.cache\huggingface\modules\transformers_modules\chatglm2-6b\tokenization_chatglm.py", line 14, in init
self.sp_model = SentencePieceProcessor(model_file=model_path)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\sentencepiece__init.py", line 447, in Init
self.Load(model_file=model_file, model_proto=model_proto)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\sentencepiece__init__.py", line 905, in Load
return self.LoadFromFile(model_file)
File "C:\Users\polyt.conda\envs\ChatGLM2-6B\lib\site-packages\sentencepiece\init__.py", line 310, in LoadFromFile
return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg)
RuntimeError: Internal: D:\a\sentencepiece\sentencepiece\src\sentencepiece_processor.cc(1102) [model_proto->ParseFromArray(serialized.data(), serialized.size())]
revision
is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision. Traceback (most recent call last): File "test.py", line 2, inExpected Behavior
No response
Steps To Reproduce
On Windows 11, run this script: from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained(".\THUDM\chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained(".\THUDM\chatglm2-6b", trust_remote_code=True, device='cuda') model = model.eval() response, history = model.chat(tokenizer, "你好", history=[]) print(response) response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history) print(response)
Environment
Anything else?
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\bin\nvcc.exe" --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2021 NVIDIA Corporation Built on Mon_Sep_13_20:11:50_Pacific_Daylight_Time_2021 Cuda compilation tools, release 11.5, V11.5.50 Build cuda_11.5.r11.5/compiler.30411180_0