Open fraser-langton opened 1 month ago
found the culprit using this script
from pathlib import Path
def check_files_for_charmap_error(glob_pattern):
# Use glob to filter files based on the provided pattern in the current directory
for file_path in Path().glob(glob_pattern): # Use the glob pattern to find matching files
if file_path.is_file(): # Ensure it's a file
if ".venv" in file_path.parts: # Exclude files in .venv directory
continue
try:
with file_path.open(encoding="cp1252") as f: # Windows default encoding
f.read()
except UnicodeDecodeError as e:
print(f"Error decoding {file_path}: {e}")
except Exception as e:
print(f"An error occurred with {file_path}: {e}")
if __name__ == "__main__":
glob_pattern = "**/*.py"
check_files_for_charmap_error(glob_pattern)
And this single char was the culprit: Í
Thanks for the bug report. While it's a bit difficult to get a file in that state, Import Linter should handle it better - I think, by emitting an error that it could not decode that particular file.
To reproduce, we need create a file in a Python codebase that has an Import Linter contract (I used Import Linter itself). This explicitly tells Python what file encoding to expect.
# -*- coding: cp1252 -*-
Then open up a Python REPL to write the invalid byte to the end of that file:
>>> with open("src/importlinter/foo.py", mode="ab") as file:
... file.write(b"\x8d")
Finally, run lint-imports
.
$ lint-imports
=============
Import Linter
=============
'charmap' codec can't decode byte 0x8d in position 25: character maps to <undefined>
The source of the problem can be seen by running the command in debug mode:
$ lint-imports --debug
...
File ".../grimp/application/usecases.py", line 133, in _scan_packages
direct_imports = import_scanner.scan_for_imports(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../grimp/adaptors/importscanner.py", line 36, in scan_for_imports
module_contents = self._read_module_contents(module_filename)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../grimp/adaptors/importscanner.py", line 109, in _read_module_contents
return self.file_system.read(module_filename)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../grimp/adaptors/filesystem.py", line 33, in read
return file.read()
^^^^^^^^^^^
File ".../python3.12/encodings/cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 25: character maps to <undefined>
I think the fix would be to catch the UnicodeDecodeError
within scan_for_imports
and resurface it as a more helpful error, including the filename.
Yep I think just emitting the filename will work :)
@seddonym I think I misled you a bit! It was the config that had emojis in it 😂
I also didn't know about the --debug flag I was just using --verbose
Traceback (most recent call last):
File "C:\Users\fraser\AppData\Roaming\uv\python\cpython-3.8.20-windows-x86_64-none\lib\runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\fraser\AppData\Roaming\uv\python\cpython-3.8.20-windows-x86_64-none\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Users\fraser\Render\render-services\.venv\Scripts\lint-imports.exe\__main__.py", line 7, in <module>
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\click\core.py", line 1120, in __call__
return self.main(*args, **kwargs)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\click\core.py", line 1041, in main
rv = self.invoke(ctx)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\click\core.py", line 1395, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\click\core.py", line 750, in invoke
return __callback(*args, **kwargs)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\cli.py", line 52, in lint_imports_command
exit_code = lint_imports(
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\cli.py", line 99, in lint_imports
passed = use_cases.lint_imports(
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\application\use_cases.py", line 57, in lint_imports
raise e
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\application\use_cases.py", line 52, in lint_imports
user_options = read_user_options(config_filename=config_filename)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\application\use_cases.py", line 87, in read_user_options
options = reader.read_options(config_filename=config_filename)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\adapters\user_options.py", line 32, in read_options
options = self._read_config_filename(config_filename)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\adapters\user_options.py", line 53, in _read_config_filename
file_contents = settings.FILE_SYSTEM.read(config_filename)
File "C:\Users\fraser\Render\render-services\.venv\lib\site-packages\importlinter\adapters\filesystem.py", line 16, in read
return file.read()
File "C:\Users\fraser\AppData\Roaming\uv\python\cpython-3.8.20-windows-x86_64-none\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 622: character maps to <undefined>
``
Ah right, thanks! Well, there are obviously a few places where it would be good to surface encoding problems in a friendlier way.
@dandiggas has kindly offered to look at this one.
Can't offer too much more info unfortunately, I literally opened my laptop up in the morning and the same command won't run, quite baffling