tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen25-7b-mono", trust_remote_code=True)
# or
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen25-7b-multi", trust_remote_code=True)
gives an error:
Traceback (most recent call last):
File "C:\teamscale\teamscale\server\com.teamscale.service\src\main\resources\com\teamscale\service\testimpact\embeddings_prioritization\ml\code_gen_embedder.py", line 4, in <module>
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen25-7b-multi", trust_remote_code=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Alessandro\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\models\auto\tokenization_auto.py", line 905, in from_pretrained
return tokenizer_class.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Alessandro\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\tokenization_utils_base.py", line 2213, in from_pretrained
return cls._from_pretrained(
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Alessandro\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\tokenization_utils_base.py", line 2447, in _from_pretrained
tokenizer = cls(*init_inputs, **init_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Alessandro\.cache\huggingface\modules\transformers_modules\Salesforce\codegen25-7b-multi\0bdf3f45a09e4f53b333393205db1388634a0e2e\tokenization_codegen25.py", line 136, in __init__
super().__init__(
File "C:\Users\Alessandro\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\tokenization_utils.py", line 435, in __init__
super().__init__(**kwargs)
File "C:\Users\Alessandro\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\tokenization_utils_base.py", line 1592, in __init__
raise AttributeError(f"{key} conflicts with the method {key} in {self.__class__.__name__}")
AttributeError: add_special_tokens conflicts with the method add_special_tokens in CodeGen25Tokenizer
I have installed tiktoken==0.8.0 as installation of tiktoken==0.4.0 via pip fails.
The code from https://huggingface.co/Salesforce/codegen25-7b-multi_P#causal-sampling-code-autocompletion and https://github.com/salesforce/CodeGen/tree/main/codegen25#sampling does not work currently. Creating the tokenizer like
gives an error:
I have installed
tiktoken==0.8.0
as installation oftiktoken==0.4.0
via pip fails.