Open david1ibarra opened 11 months ago
Hi @david1ibarra
Can you try to run this before executing geNomad?
PYTHONIOENCODING="utf-8"
LANG="en_US.UTF-8"
Also, what do you mean by truncated output? Can you provide an example?
I tried running what you provided before executing geNomad but it did not work. By truncated output I refer to something like this:
Traceback (most recent call last):
File "
If you rename the input to remove spaces and parenthesis, does it work?
I tried this:
import subprocess
Specify the correct paths
input_file_path = "C:\Users\DavidIbarra\OneDrive - Cemvita Factory Inc\Desktop\E. coli K12 MG1655 (NC_000913).fasta.gz" output_directory = "genomad_output" database_path = "C:\Users\DavidIbarra\Downloads\genomad_db_v1.2.tar.gz"
Remove spaces and parentheses from the input file name
input_file_name = os.path.basename(input_file_path) input_file_name_without_spaces_parentheses = input_filename.replace(" ", "").replace("(", "").replace(")", "")
Construct the new input file path with the modified filename
new_input_file_path = os.path.join(os.path.dirname(input_file_path), input_file_name_without_spaces_parentheses)
Rename the input file
os.rename(input_file_path, new_input_file_path)
Create the output directory if it doesn't exist
if not os.path.exists(output_directory): ... os.makedirs(output_directory) ...
Construct the command with the updated input file path
command = f'set PYTHONIOENCODING=utf-8 && genomad end-to-end --cleanup --splits 8 "{new_input_file_path}" {output_directory} {database_path}'
Run the command in the shell
subprocess.run(command, shell=True) ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ Executing geNomad annotate (v1.7.0). This will perform gene calling in the input sequences and annotate the predicted │ │ proteins with geNomad's markers. │ │ ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── │ │ Outputs: │ │ genomad_output\E._coli_K12_MG1655_NC_000913_annotate │ │ ├── E._coli_K12_MG1655_NC_000913_annotate.json (execution parameters) │ │ ├── E._coli_K12_MG1655_NC_000913_genes.tsv (gene annotation data) │ │ ├── E._coli_K12_MG1655_NC_000913_taxonomy.tsv (taxonomic assignment) │ │ ├── E._coli_K12_MG1655_NC_000913_mmseqs2.tsv (MMseqs2 output file) │ │ └── E._coli_K12_MG1655_NC_000913_proteins.faa (protein FASTA file) │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ Traceback (most recent call last): File "
", line 198, in _run_module_as_main File " ", line 88, in _run_code File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Scripts\genomad.exe__main.py", line 7, in File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1157, in call return self.main(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich_click\rich_group.py", line 21, in main rv = super().main(args, standalone_mode=False, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1078, in main rv = self.invoke(ctx) ^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1688, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1434, in invoke return ctx.invoke(self.callback, ctx.params) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 783, in invoke return __callback(args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\decorators.py", line 33, in new_func return f(get_current_context(), *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\cli.py", line 1240, in end_to_end ctx.invoke( File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 783, in invoke return callback(*args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\cli.py", line 441, in annotate genomad.annotate.main( File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\modules\annotate.py", line 82, in main utils.display_header( File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\utils.py", line 286, in display_header console.print( File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\utils.py", line 96, in print self.write_print(*args, *kwargs) File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\utils.py", line 80, in write_print self.writer_console.print(args, kwargs) File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 1673, in print with self: File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 865, in exit self._exit_buffer() File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 823, in _exit_buffer self._check_buffer() File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 2039, in _check_buffer write(text) File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py", line 19, in encode return codecs.charmap_encode(input,self.errors,encoding_table)[0] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UnicodeEncodeError: 'charmap' codec can't encode characters in position 381-498: character maps toYou may need to add PYTHONIOENCODING=utf-8 to your environment CompletedProcess(args='set PYTHONIOENCODING=utf-8 && genomad end-to-end --cleanup --splits 8 "C:\Users\DavidIbarra\OneDrive - Cemvita Factory Inc\Desktop\E._coli_K12_MG1655_NC_000913.fasta.gz" genomad_output C:\Users\DavidIbarra\Downloads\genomad_db_v1.2.tar.gz', returncode=1)
I am encountering unexpected errors and incomplete output during the execution of the geNomad tool. The error messages indicate potential character encoding issues, and the output seems to be truncated or incomplete, making it difficult to interpret the results accurately. I have also tried setting the PYTHONIOENCODING environment variable to utf-8 to address potential character encoding problems.
Issue Details Error Description: When attempting to execute the geNomad tool, I receive an error related to character encoding ('charmap' codec) and difficulties in encoding specific characters. The error messages include:
UnicodeEncodeError: 'charmap' codec can't encode characters in position ...: character maps to
These errors suggest that the tool might face challenges in handling file paths with special characters or in environments with non-UTF-8 character encoding.
Output Truncation:
Additionally, even in successful executions, the output seems to be truncated or cut off, making it challenging to obtain comprehensive and complete information from the tool's output.
Steps to Reproduce Install the geNomad tool. Execute the tool with a command that involves file paths with special characters or in a system with a non-UTF-8 character encoding environment. Expected Behavior The tool should execute smoothly without encountering character encoding errors. The output should be complete and display all relevant information without truncation. Additional Information
I have attempted to resolve the character encoding issue by setting the PYTHONIOENCODING environment variable to utf-8, but the error persists.
Executing geNomad annotate (v1.7.0). This will perform gene calling in the input sequences and annotate the predicted │ │ proteins with geNomad's markers. │ │ ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── │ │ Outputs: │ │ genomad_output\E. coli K12 MG1655 (NC_000913)_annotate │ │ ├── E. coli K12 MG1655 (NC_000913)_annotate.json (execution parameters) │ │ ├── E. coli K12 MG1655 (NC_000913)_genes.tsv (gene annotation data) │ │ ├── E. coli K12 MG1655 (NC_000913)_taxonomy.tsv (taxonomic assignment) │ │ ├── E. coli K12 MG1655 (NC_000913)_mmseqs2.tsv (MMseqs2 output file) │ │ └── E. coli K12 MG1655 (NC_000913)_proteins.faa (protein FASTA file) │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ Traceback (most recent call last): File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Scripts\genomad.exe__main.py", line 7, in
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1157, in call
return self.main(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich_click\rich_group.py", line 21, in main
rv = super().main(args, standalone_mode=False, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1078, in main
rv = self.invoke(ctx)
^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 1434, in invoke
return ctx.invoke(self.callback, ctx.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 783, in invoke
return __callback(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\decorators.py", line 33, in new_func
return f(get_current_context(), *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\cli.py", line 1240, in end_to_end
ctx.invoke(
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\click\core.py", line 783, in invoke
return callback(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\cli.py", line 441, in annotate
genomad.annotate.main(
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\modules\annotate.py", line 82, in main
utils.display_header(
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\utils.py", line 286, in display_header
console.print(
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\utils.py", line 96, in print
self.write_print(*args, *kwargs)
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\genomad\utils.py", line 80, in write_print
self.writer_console.print(args, kwargs)
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 1673, in print
with self:
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 865, in exit
self._exit_buffer()
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 823, in _exit_buffer
self._check_buffer()
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\site-packages\rich\console.py", line 2039, in _check_buffer
write(text)
File "C:\Users\DavidIbarra\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode characters in position 381-498: character maps to
You may need to add PYTHONIOENCODING=utf-8 to your environment