Helo. I have many html files with paragraphs written in 2 languages, romanian and chinese. I want to translate only one of the languages, and I want to skip to the other one. This line, even if is auto, translates also romanian, even if there are romanian paragraphs. I want to skip fast the romanian paragraphs and translate only chinese paragraphs.
The deep-translator library also translates the Romanian language, but practically it makes no sense to translate them again. The bookstore must detect and quickly skip to paragraphs that are already written in Romanian.
from deep_translator import GoogleTranslator
translator = GoogleTranslator(source='auto', target='ro')
# Funcție pentru detectarea limbii unui text
def detect_language(text, translator):
try:
detected_language = translator.detect(text)
return detected_language
except RequestError:
return None
def translate_html_tags(file_path, translator, subfolder_path, ignored_files, translated_files):
global translated_tags_count
translated_tags_count = 0 # Reset the global counter for each file
file_name = os.path.basename(file_path)
if not file_name.endswith('.html'):
ignored_files.append(file_name)
print(f"Ignored file: {file_name}")
return None
print(f"Translating file: {file_name}") # Print the file being translated
local_tag_count = 0 # Local counter for tags within this file
translated_file_path = os.path.join(subfolder_path, file_name.rsplit('.', 1)[0] + '_ro.html')
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
content = apply_regex_before_translation(content)
for tag_regex in tags_to_translate:
matches = re.finditer(tag_regex, content, re.DOTALL)
for match in matches:
local_tag_count += 1 # Increment local tag counter
tag_start = match.group(1)
tag_content = match.group(2)
tag_end = match.group(3)
# Verifică limba tag-ului
if detect_language(tag_content, translator) != 'romanian':
print(f"Translating tag {local_tag_count}: {tag_start}...{tag_end}")
translated_content = translate_in_parts(tag_content, translator)
translated_tag = f"{tag_start}{translated_content}{tag_end}"
content = content.replace(f"{tag_start}{tag_content}{tag_end}", translated_tag)
translated_tags_count += 1
else:
print(f"Skipping Romanian tag {local_tag_count}: {tag_start}...{tag_end}")
# Salvează conținutul tradus
with open(translated_file_path, 'w', encoding='utf-8') as translated_file:
translated_file.write(content)
translated_files.append(translated_file_path)
The Error:
Traceback (most recent call last):
File "E:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\OANA\2. Translate OANA Website.py", line 319, in <module>
translated_file = translate_html_tags(
^^^^^^^^^^^^^^^^^^^^
File "E:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\OANA\2. Translate OANA Website.py", line 183, in translate_html_tags
if detect_language(tag_content, translator) != 'romanian':
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "E:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\OANA\2. Translate OANA Website.py", line 147, in detect_language
detected_language = translator.detect(text)
^^^^^^^^^^^^^^^^^
AttributeError: 'GoogleTranslator' object has no attribute 'detect'
>>>
Helo. I have many html files with paragraphs written in 2 languages, romanian and chinese. I want to translate only one of the languages, and I want to skip to the other one. This line, even if is auto, translates also romanian, even if there are romanian paragraphs. I want to skip fast the romanian paragraphs and translate only chinese paragraphs.
The deep-translator library also translates the Romanian language, but practically it makes no sense to translate them again. The bookstore must detect and quickly skip to paragraphs that are already written in Romanian.
Basically, I want to save translation time.
translator = GoogleTranslator(source='auto', target='ro')
The Error: