[2023-12-06 16:26:08,981] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Traceback (most recent call last):
File "/data-ext/codeshell/demo.py", line 8, in
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True).to(device)
File "/data-ext/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to
return super().to(*args, *kwargs)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1145, in to
return self._apply(convert)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 797, in _apply
module._apply(fn)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 797, in _apply
module._apply(fn)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 797, in _apply
module._apply(fn)
[Previous line repeated 2 more times]
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 820, in _apply
param_applied = fn(param)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1143, in convert
return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 40, in Params4bitTo
new_param = Params4bit(self.to(device=device, dtype=dtype, non_blocking=non_blocking),
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 40, in Params4bitTo
new_param = Params4bit(self.to(device=device, dtype=dtype, non_blocking=non_blocking),
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 40, in Params4bitTo
new_param = Params4bit(self.to(device=device, dtype=dtype, non_blocking=non_blocking),
[Previous line repeated 986 more times]
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 31, in Params4bitTo
device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(args, **kwargs)
RecursionError: maximum recursion depth exceeded while calling a Python object
代码如下
import time import torch from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import GenerationConfig
model_dir = "./CodeShell-7B-Chat-int4" device = torch.device('cpu') model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True).to(device) tokenizer = AutoTokenizer.from_pretrained(model_dir)
history = [] query = """""" response = model.chat(query, history, tokenizer) print(response)
报错:
[2023-12-06 16:26:08,981] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) Traceback (most recent call last): File "/data-ext/codeshell/demo.py", line 8, in
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True).to(device)
File "/data-ext/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py", line 2271, in to
return super().to(*args, *kwargs)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1145, in to
return self._apply(convert)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 797, in _apply
module._apply(fn)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 797, in _apply
module._apply(fn)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 797, in _apply
module._apply(fn)
[Previous line repeated 2 more times]
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 820, in _apply
param_applied = fn(param)
File "/data-ext/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1143, in convert
return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 40, in Params4bitTo
new_param = Params4bit(self.to(device=device, dtype=dtype, non_blocking=non_blocking),
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 40, in Params4bitTo
new_param = Params4bit(self.to(device=device, dtype=dtype, non_blocking=non_blocking),
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 40, in Params4bitTo
new_param = Params4bit(self.to(device=device, dtype=dtype, non_blocking=non_blocking),
[Previous line repeated 986 more times]
File "/root/.cache/huggingface/modules/transformers_modules/CodeShell-7B-Chat-int4/quantizer.py", line 31, in Params4bitTo
device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(args, **kwargs)
RecursionError: maximum recursion depth exceeded while calling a Python object