OpenMOSS / MOSS

An open-source tool-augmented conversational language model from Fudan University
https://txsun1997.github.io/blogs/moss.html
Apache License 2.0
11.89k stars 1.15k forks source link

moss-moon-003-sft Jupyter测试失败 #376

Open YoloZyk opened 5 months ago

YoloZyk commented 5 months ago

在ModelScope上发布的moss-moon-003-sft-plugin模型,使用提供的Jupyter测试样例部署失败,在自己服务器和魔塔提供的实例上都不可以,错误是AttributeError: 'MossTokenizer' object has no attribute 'encoder',GPU用的A40不知道有没有影响 `AttributeError Traceback (most recent call last) File /opt/conda/lib/python3.10/site-packages/modelscope/utils/registry.py:212, in build_from_cfg(cfg, registry, group_key, default_args) 211 else: --> 212 return obj_cls(**args) 213 except Exception as e: 214 # Normal TypeError does not print class name.

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:21, in mossmoon003sftpluginTextGenerationPipeline.init(self, model, *args, *kwargs) 16 def init( 17 self, 18 model: Union[Model, str], 19 args, 20 kwargs): ---> 21 model = mossmoon003sftpluginTextGeneration(model) if isinstance(model, str) else model 22 super().init(model=model, kwargs)

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:43, in mossmoon003sftpluginTextGeneration.init(self, model_dir, *args, **kwargs) 42 # loading tokenizer ---> 43 self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) 44 self.model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True).half()

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:774, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, *kwargs) 773 tokenizer_class.register_for_auto_class() --> 774 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, inputs, **kwargs) 775 elif config_tokenizer_class is not None:

File /opt/conda/lib/python3.10/site-packages/modelscope/utils/hf_util.py:52, in patch_tokenizer_base..from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs) 51 model_dir = pretrained_model_name_or_path ---> 52 return ori_from_pretrained(cls, model_dir, model_args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2028, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, *kwargs) 2026 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}") -> 2028 return cls._from_pretrained( 2029 resolved_vocab_files, 2030 pretrained_model_name_or_path, 2031 init_configuration, 2032 init_inputs, 2033 token=token, 2034 cache_dir=cache_dir, 2035 local_files_only=local_files_only, 2036 _commit_hash=commit_hash, 2037 _is_local=is_local, 2038 **kwargs, 2039 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2260, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, *kwargs) 2259 try: -> 2260 tokenizer = cls(init_inputs, **init_kwargs) 2261 except OSError:

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:149, in MossTokenizer.init(self, vocab_file, merges_file, errors, unk_token, bos_token, eos_token, pad_token, add_prefix_space, add_bos_token, kwargs) 148 pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token --> 149 super().init( 150 errors=errors, 151 unk_token=unk_token, 152 bos_token=bos_token, 153 eos_token=eos_token, 154 pad_token=pad_token, 155 add_prefix_space=add_prefix_space, 156 add_bos_token=add_bos_token, 157 kwargs, 158 ) 159 self.add_bos_token = add_bos_token

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:367, in PreTrainedTokenizer.init(self, **kwargs) 365 # 4. If some of the special tokens are not part of the vocab, we add them, at the end. 366 # the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following tokenizers --> 367 self._add_tokens( 368 [token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder], 369 special_tokens=True, 370 ) 372 self._decode_use_source_tokenizer = False

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:467, in PreTrainedTokenizer._add_tokens(self, new_tokens, special_tokens) 466 # TODO this is fairly slow to improve! --> 467 current_vocab = self.get_vocab().copy() 468 new_idx = len(current_vocab) # only call this once, len gives the last index + 1

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:182, in MossTokenizer.get_vocab(self) 181 def get_vocab(self): --> 182 return dict(self.encoder, **self.added_tokens_encoder)

AttributeError: 'MossTokenizer' object has no attribute 'encoder'`

后来发现moss-moon-003-sft也是同样的问题,有老哥知道怎么回事吗?

Xuange923 commented 3 months ago

你好,请问你解决这个问题了吗

xu-song commented 2 months ago

加个revision="refs/pr/6" 就可以了

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft", trust_remote_code=True, revision="refs/pr/6")
print(tokenizer.encode("good job"))