Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Load kernel : D:\langchain-model\chatglm-6b-int4\quantization_kernels.so
No set_num_threads() found in kernel.
Setting CPU quantization kernel threads to 10
Using quantization cache
Applying quantization to glm layers
--- Logging error ---
Traceback (most recent call last):
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 19, in
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels__init.py", line 1, in
from . import library
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library__init__.py", line 2, in
from . import cuda
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\cuda.py", line 7, in
cuda = Lib.from_lib("cuda", ctypes.WinDLL("nvcuda.dll"))
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 63, in from_lib
ret = Lib(name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 45, in init__
lib_path = windows_find_lib(self.__name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 39, in windows_find_lib
return lookup_dll(lib_name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 16, in lookup_dll
for name in os.listdir(path):
NotADirectoryError: [WinError 267] 目录名称无效。: 'C:\Windows\SysWOW64\WindowsPowerShell\v1.0\powershell.exe'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Anaconda\envs\chatglm\lib\logging__init.py", line 1100, in emit
msg = self.format(record)
File "D:\Anaconda\envs\chatglm\lib\logging\init.py", line 943, in format
return fmt.format(record)
File "D:\Anaconda\envs\chatglm\lib\logging\init.py", line 678, in format
record.message = record.getMessage()
File "D:\Anaconda\envs\chatglm\lib\logging\init.py", line 368, in getMessage
msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 8, in
model = AutoModel.from_pretrained("D:\ChatGLM-6B-int4\ChatGLM-6B-main\model", trust_remote_code=True).half().cuda()
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\models\auto\auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\modeling_utils.py", line 2498, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1061, in init__
self.quantize(self.config.quantization_bit, self.config.quantization_embeddings, use_quantization_cache=True, empty_init=True)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1424, in quantize
from .quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 46, in
logger.warning("Failed to load cpm_kernels:", exception)
Message: 'Failed to load cpm_kernels:'
Arguments: (NotADirectoryError(20, '目录名称无效。'),)
欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序
用户:你好
Traceback (most recent call last):
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 58, in
main()
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 43, in main
for response, history in model.stream_chat(tokenizer, query, history=history):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1311, in stream_chat
for outputs in self.stream_generate(inputs, gen_kwargs):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1388, in stream_generate
outputs = self(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 996, in forward
layer_ret = layer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 627, in forward
attention_outputs = self.attention(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 445, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 393, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\function.py", line 598, in apply
return super().apply(args, **kwargs) # type: ignore[misc]
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 56, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 276, in extract_weight_to_half
func = kernels.int4WeightExtractionHalf
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'
Expected Behavior
No response
Steps To Reproduce
运行cli_demo.py后报错
Environment
- OS:Windows 11
- Python:3.10
- Transformers:4.27.1
- PyTorch:2.3.1+cu118
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :true
Is there an existing issue for this?
Current Behavior
运行cli_demo.py后输入“你好”报错
Explicitly passing a
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels__init.py", line 1, in
from . import library
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library__init__.py", line 2, in
from . import cuda
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\cuda.py", line 7, in
cuda = Lib.from_lib("cuda", ctypes.WinDLL("nvcuda.dll"))
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 63, in from_lib
ret = Lib(name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 45, in init__
lib_path = windows_find_lib(self.__name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 39, in windows_find_lib
return lookup_dll(lib_name)
File "D:\Anaconda\envs\chatglm\lib\site-packages\cpm_kernels\library\base.py", line 16, in lookup_dll
for name in os.listdir(path):
NotADirectoryError: [WinError 267] 目录名称无效。: 'C:\Windows\SysWOW64\WindowsPowerShell\v1.0\powershell.exe'
revision
is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision. Explicitly passing arevision
is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision. Explicitly passing arevision
is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision. Load kernel : D:\langchain-model\chatglm-6b-int4\quantization_kernels.so No set_num_threads() found in kernel. Setting CPU quantization kernel threads to 10 Using quantization cache Applying quantization to glm layers --- Logging error --- Traceback (most recent call last): File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 19, inDuring handling of the above exception, another exception occurred:
Traceback (most recent call last): File "D:\Anaconda\envs\chatglm\lib\logging__init.py", line 1100, in emit msg = self.format(record) File "D:\Anaconda\envs\chatglm\lib\logging\init.py", line 943, in format return fmt.format(record) File "D:\Anaconda\envs\chatglm\lib\logging\init.py", line 678, in format record.message = record.getMessage() File "D:\Anaconda\envs\chatglm\lib\logging\init.py", line 368, in getMessage msg = msg % self.args TypeError: not all arguments converted during string formatting Call stack: File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 8, in
model = AutoModel.from_pretrained("D:\ChatGLM-6B-int4\ChatGLM-6B-main\model", trust_remote_code=True).half().cuda()
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\models\auto\auto_factory.py", line 466, in from_pretrained
return model_class.from_pretrained(
File "D:\Anaconda\envs\chatglm\lib\site-packages\transformers\modeling_utils.py", line 2498, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1061, in init__
self.quantize(self.config.quantization_bit, self.config.quantization_embeddings, use_quantization_cache=True, empty_init=True)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1424, in quantize
from .quantization import quantize, QuantizedEmbedding, QuantizedLinear, load_cpu_kernel
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 46, in
logger.warning("Failed to load cpm_kernels:", exception)
Message: 'Failed to load cpm_kernels:'
Arguments: (NotADirectoryError(20, '目录名称无效。'),)
欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序
用户:你好 Traceback (most recent call last): File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 58, in
main()
File "D:\ChatGLM-6B-int4\ChatGLM-6B-main\cli_demo.py", line 43, in main
for response, history in model.stream_chat(tokenizer, query, history=history):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1311, in stream_chat
for outputs in self.stream_generate(inputs, gen_kwargs):
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1388, in stream_generate
outputs = self(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 996, in forward
layer_ret = layer(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(args, kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 627, in forward
attention_outputs = self.attention(
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 445, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, *kwargs)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 393, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "D:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\function.py", line 598, in apply
return super().apply(args, **kwargs) # type: ignore[misc]
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 56, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "C:\Users\HP/.cache\huggingface\modules\transformers_modules\model\quantization.py", line 276, in extract_weight_to_half
func = kernels.int4WeightExtractionHalf
AttributeError: 'NoneType' object has no attribute 'int4WeightExtractionHalf'
Expected Behavior
No response
Steps To Reproduce
运行cli_demo.py后报错
Environment
Anything else?
No response