AndyTheFactory / RO-Diacritics

Python package for Romanian diacritics restoration
MIT License
4 stars 0 forks source link

cannot import name 'RomanianDiacritics' from 'ro_diacritics' #2

Open me-suzy opened 2 months ago

me-suzy commented 2 months ago

multumesc pentru libraria ro_diacritics . Sper sa fie buna in Python, dar am o eroare:

*** Remote Interpreter Reinitialized ***
C:\Users\necul\AppData\Local\Programs\Python\Python312\Lib\site-packages\torchtext\vocab\__init__.py:4: UserWarning: 
/!\ IMPORTANT WARNING ABOUT TORCHTEXT STATUS /!\ 
Torchtext is deprecated and the last released version will be 0.18 (this one). You can silence this warning by calling the following at the beginnign of your scripts: `import torchtext; torchtext.disable_torchtext_deprecation_warning()`
  warnings.warn(torchtext._TORCHTEXT_DEPRECATION_MSG)
C:\Users\necul\AppData\Local\Programs\Python\Python312\Lib\site-packages\torchtext\utils.py:4: UserWarning: 
/!\ IMPORTANT WARNING ABOUT TORCHTEXT STATUS /!\ 
Torchtext is deprecated and the last released version will be 0.18 (this one). You can silence this warning by calling the following at the beginnign of your scripts: `import torchtext; torchtext.disable_torchtext_deprecation_warning()`
  warnings.warn(torchtext._TORCHTEXT_DEPRECATION_MSG)
Traceback (most recent call last):
  File "<module1>", line 3, in <module>
ImportError: cannot import name 'RomanianDiacritics' from 'ro_diacritics' (C:\Users\necul\AppData\Local\Programs\Python\Python312\Lib\site-packages\ro_diacritics\__init__.py)
>>> 

Iata codul meu Python original:

import os
import re
from ro_diacritics import RomanianDiacritics

# Inițializăm obiectul RomanianDiacritics
diacritics = RomanianDiacritics()

def process_text(text):
    # Adaugă diacritice
    text_with_diacritice = diacritics.add_diacritics(text)

    # Aici puteți adăuga reguli simple de corectare gramaticală
    # De exemplu, corectarea unor greșeli comune:
    text_with_diacritice = text_with_diacritice.replace(" sa ", " să ")
    text_with_diacritice = text_with_diacritice.replace(" pe care ", " pe care ")

    return text_with_diacritice

def process_html_file(file_path, output_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Procesează conținutul din h1
    h1_pattern = r'(<h1 class="den_articol" itemprop="name">)(.*?)(</h1>)'
    content = re.sub(h1_pattern, lambda m: m.group(1) + process_text(m.group(2)) + m.group(3), content)

    # Procesează conținutul din p cu clasa text_obisnuit2
    p_pattern = r'(<p class="text_obisnuit2">)(.*?)(</p>)'
    content = re.sub(p_pattern, lambda m: m.group(1) + process_text(m.group(2)) + m.group(3), content)

    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(content)

    print(f"Fișier procesat și salvat: {output_path}")

# Directorul sursă și destinație
source_dir = r"g:\De pus pe FTP 2\66"
output_dir = os.path.join(source_dir, "Output")
os.makedirs(output_dir, exist_ok=True)

# Procesează toate fișierele HTML din director
for filename in os.listdir(source_dir):
    if filename.endswith('.html'):
        file_path = os.path.join(source_dir, filename)
        output_path = os.path.join(output_dir, filename)
        print(f"Procesare fișier: {filename}")
        process_html_file(file_path, output_path)

print("Procesarea tuturor fișierelor a fost finalizată.")