Open ImGoodBai opened 1 year ago
$ python demo.py Traceback (most recent call last): File "/data/good/baichuan-7B/demo.py", line 3, in tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True) File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 693, in from_pretrained return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, *kwargs) File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1812, in from_pretrained return cls._from_pretrained( File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1975, in _from_pretrained tokenizer = cls(init_inputs, **init_kwargs) File "/data/good/.cache/huggingface/modules/transformers_modules/baichuan-7B/tokenization_baichuan.py", line 89, in init self.sp_model.Load(vocab_file) File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/sentencepiece/init.py", line 905, in Load return self.LoadFromFile(model_file) File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/sentencepiece/init.py", line 310, in LoadFromFile return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg) RuntimeError: Internal: src/sentencepiece_processor.cc(1101) [model_proto->ParseFromArray(serialized.data(), serialized.size())]
$ cat demo.py from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True) inputs = tokenizer('登鹳雀楼->王之涣\n夜雨寄北->', return_tensors='pt') inputs = inputs.to('cuda:0') pred = model.generate(**inputs, max_new_tokens=64,repetition_penalty=1.1) print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
检查下baichuan-7B/tokenizer.model 这个文件,需要替换为https://huggingface.co/baichuan-inc/baichuan-7B/resolve/main/tokenizer.model 就好了
Required prerequisites
Questions
$ python demo.py Traceback (most recent call last): File "/data/good/baichuan-7B/demo.py", line 3, in
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 693, in from_pretrained
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, *kwargs)
File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1812, in from_pretrained
return cls._from_pretrained(
File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1975, in _from_pretrained
tokenizer = cls(init_inputs, **init_kwargs)
File "/data/good/.cache/huggingface/modules/transformers_modules/baichuan-7B/tokenization_baichuan.py", line 89, in init
self.sp_model.Load(vocab_file)
File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/sentencepiece/init.py", line 905, in Load
return self.LoadFromFile(model_file)
File "/home/good/anaconda3/envs/baichuan/lib/python3.10/site-packages/sentencepiece/init.py", line 310, in LoadFromFile
return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg)
RuntimeError: Internal: src/sentencepiece_processor.cc(1101) [model_proto->ParseFromArray(serialized.data(), serialized.size())]
$ cat demo.py from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True) inputs = tokenizer('登鹳雀楼->王之涣\n夜雨寄北->', return_tensors='pt') inputs = inputs.to('cuda:0') pred = model.generate(**inputs, max_new_tokens=64,repetition_penalty=1.1) print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
Checklist