File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:431, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, code_revision, **kwargs)
429 code_revision = revision
430 # And lastly we get the class inside our newly created module
--> 431 final_module = get_cached_module_file(
432 repo_id,
433 module_file + ".py",
434 cache_dir=cache_dir,
435 force_download=force_download,
436 resume_download=resume_download,
437 proxies=proxies,
438 use_auth_token=use_auth_token,
439 revision=code_revision,
440 local_files_only=local_files_only,
441 repo_type=repo_type,
442 )
443 return get_class_in_module(class_name, final_module.replace(".py", ""))
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:268, in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, _commit_hash)
265 raise
267 # Check we have all the requirements in our environment
--> 268 modules_needed = check_imports(resolved_module_file)
270 # Now we move the module inside our cached dynamic modules.
271 full_submodule = TRANSFORMERS_DYNAMIC_MODULE_NAME + os.path.sep + submodule
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:142, in check_imports(filename)
138 def check_imports(filename):
139 """
140 Check if the current Python environment contains all the libraries that are imported in a file.
141 """
--> 142 imports = get_imports(filename)
143 missing_packages = []
144 for imp in imports:
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:124, in get_imports(filename)
120 """
121 Extracts all the libraries that are imported in a file.
122 """
123 with open(filename, "r", encoding="utf-8") as f:
--> 124 content = f.read()
126 # filter out try/except block so in custom code we can have try/except imports
127 content = re.sub(r"\stry\s:\s.?\sexcept\s.*?:", "", content, flags=re.MULTILINE | re.DOTALL)
File ~/.conda/envs/py38/lib/python3.8/codecs.py:322, in BufferedIncrementalDecoder.decode(self, input, final)
319 def decode(self, input, final=False):
320 # decode input (taking the buffer into account)
321 data = self.buffer + input
--> 322 (result, consumed) = self._buffer_decode(data, self.errors, final)
323 # keep undecoded input until the next call
324 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xce in position 5811: invalid continuation byte
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:431, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, code_revision, **kwargs)
429 code_revision = revision
430 # And lastly we get the class inside our newly created module
--> 431 final_module = get_cached_module_file(
432 repo_id,
433 module_file + ".py",
434 cache_dir=cache_dir,
435 force_download=force_download,
436 resume_download=resume_download,
437 proxies=proxies,
438 use_auth_token=use_auth_token,
439 revision=code_revision,
440 local_files_only=local_files_only,
441 repo_type=repo_type,
442 )
443 return get_class_in_module(class_name, final_module.replace(".py", ""))
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:268, in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, _commit_hash)
265 raise
267 # Check we have all the requirements in our environment
--> 268 modules_needed = check_imports(resolved_module_file)
270 # Now we move the module inside our cached dynamic modules.
271 full_submodule = TRANSFORMERS_DYNAMIC_MODULE_NAME + os.path.sep + submodule
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:142, in check_imports(filename)
138 def check_imports(filename):
139 """
140 Check if the current Python environment contains all the libraries that are imported in a file.
141 """
--> 142 imports = get_imports(filename)
143 missing_packages = []
144 for imp in imports:
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:124, in get_imports(filename)
120 """
121 Extracts all the libraries that are imported in a file.
122 """
123 with open(filename, "r", encoding="utf-8") as f:
--> 124 content = f.read()
126 # filter out try/except block so in custom code we can have try/except imports
127 content = re.sub(r"\stry\s:\s.?\sexcept\s.*?:", "", content, flags=re.MULTILINE | re.DOTALL)
File ~/.conda/envs/py38/lib/python3.8/codecs.py:322, in BufferedIncrementalDecoder.decode(self, input, final)
319 def decode(self, input, final=False):
320 # decode input (taking the buffer into account)
321 data = self.buffer + input
--> 322 (result, consumed) = self._buffer_decode(data, self.errors, final)
323 # keep undecoded input until the next call
324 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xce in position 5811: invalid continuation byte
Environment
- OS:
- Python:
- Transformers:
- PyTorch:
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :
Is there an existing issue for this?
Current Behavior
UnicodeDecodeError Traceback (most recent call last) Cell In[3], line 7 2 from transformers import AutoModel,AutoTokenizer,AutoConfig,DataCollatorForSeq2Seq 5 config = AutoConfig.from_pretrained(cfg.model_name_or_path, trust_remote_code=True) ----> 7 tokenizer = AutoTokenizer.from_pretrained( 8 cfg.model_name_or_path, trust_remote_code=True) 10 model = AutoModel.from_pretrained(cfg.model_name_or_path,config=config, 11 trust_remote_code=True).half() 13 #先量化瘦身
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/models/auto/tokenization_auto.py:676, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, inputs, kwargs) 674 else: 675 class_ref = tokenizer_auto_map[0] --> 676 tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, kwargs) 677 _ = kwargs.pop("code_revision", None) 678 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, inputs, **kwargs)
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:431, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, code_revision, **kwargs) 429 code_revision = revision 430 # And lastly we get the class inside our newly created module --> 431 final_module = get_cached_module_file( 432 repo_id, 433 module_file + ".py", 434 cache_dir=cache_dir, 435 force_download=force_download, 436 resume_download=resume_download, 437 proxies=proxies, 438 use_auth_token=use_auth_token, 439 revision=code_revision, 440 local_files_only=local_files_only, 441 repo_type=repo_type, 442 ) 443 return get_class_in_module(class_name, final_module.replace(".py", ""))
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:268, in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, _commit_hash) 265 raise 267 # Check we have all the requirements in our environment --> 268 modules_needed = check_imports(resolved_module_file) 270 # Now we move the module inside our cached dynamic modules. 271 full_submodule = TRANSFORMERS_DYNAMIC_MODULE_NAME + os.path.sep + submodule
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:142, in check_imports(filename) 138 def check_imports(filename): 139 """ 140 Check if the current Python environment contains all the libraries that are imported in a file. 141 """ --> 142 imports = get_imports(filename) 143 missing_packages = [] 144 for imp in imports:
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:124, in get_imports(filename) 120 """ 121 Extracts all the libraries that are imported in a file. 122 """ 123 with open(filename, "r", encoding="utf-8") as f: --> 124 content = f.read() 126 # filter out try/except block so in custom code we can have try/except imports 127 content = re.sub(r"\stry\s:\s.?\sexcept\s.*?:", "", content, flags=re.MULTILINE | re.DOTALL)
File ~/.conda/envs/py38/lib/python3.8/codecs.py:322, in BufferedIncrementalDecoder.decode(self, input, final) 319 def decode(self, input, final=False): 320 # decode input (taking the buffer into account) 321 data = self.buffer + input --> 322 (result, consumed) = self._buffer_decode(data, self.errors, final) 323 # keep undecoded input until the next call 324 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xce in position 5811: invalid continuation byte
Expected Behavior
No response
Steps To Reproduce
UnicodeDecodeError Traceback (most recent call last) Cell In[3], line 7 2 from transformers import AutoModel,AutoTokenizer,AutoConfig,DataCollatorForSeq2Seq 5 config = AutoConfig.from_pretrained(cfg.model_name_or_path, trust_remote_code=True) ----> 7 tokenizer = AutoTokenizer.from_pretrained( 8 cfg.model_name_or_path, trust_remote_code=True) 10 model = AutoModel.from_pretrained(cfg.model_name_or_path,config=config, 11 trust_remote_code=True).half() 13 #先量化瘦身
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/models/auto/tokenization_auto.py:676, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, inputs, kwargs) 674 else: 675 class_ref = tokenizer_auto_map[0] --> 676 tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, kwargs) 677 _ = kwargs.pop("code_revision", None) 678 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, inputs, **kwargs)
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:431, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, code_revision, **kwargs) 429 code_revision = revision 430 # And lastly we get the class inside our newly created module --> 431 final_module = get_cached_module_file( 432 repo_id, 433 module_file + ".py", 434 cache_dir=cache_dir, 435 force_download=force_download, 436 resume_download=resume_download, 437 proxies=proxies, 438 use_auth_token=use_auth_token, 439 revision=code_revision, 440 local_files_only=local_files_only, 441 repo_type=repo_type, 442 ) 443 return get_class_in_module(class_name, final_module.replace(".py", ""))
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:268, in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, repo_type, _commit_hash) 265 raise 267 # Check we have all the requirements in our environment --> 268 modules_needed = check_imports(resolved_module_file) 270 # Now we move the module inside our cached dynamic modules. 271 full_submodule = TRANSFORMERS_DYNAMIC_MODULE_NAME + os.path.sep + submodule
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:142, in check_imports(filename) 138 def check_imports(filename): 139 """ 140 Check if the current Python environment contains all the libraries that are imported in a file. 141 """ --> 142 imports = get_imports(filename) 143 missing_packages = [] 144 for imp in imports:
File ~/.conda/envs/py38/lib/python3.8/site-packages/transformers/dynamic_module_utils.py:124, in get_imports(filename) 120 """ 121 Extracts all the libraries that are imported in a file. 122 """ 123 with open(filename, "r", encoding="utf-8") as f: --> 124 content = f.read() 126 # filter out try/except block so in custom code we can have try/except imports 127 content = re.sub(r"\stry\s:\s.?\sexcept\s.*?:", "", content, flags=re.MULTILINE | re.DOTALL)
File ~/.conda/envs/py38/lib/python3.8/codecs.py:322, in BufferedIncrementalDecoder.decode(self, input, final) 319 def decode(self, input, final=False): 320 # decode input (taking the buffer into account) 321 data = self.buffer + input --> 322 (result, consumed) = self._buffer_decode(data, self.errors, final) 323 # keep undecoded input until the next call 324 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xce in position 5811: invalid continuation byte
Environment
Anything else?
No response