VikParuchuri / surya

OCR, layout analysis, reading order, table recognition in 90+ languages
https://www.datalab.to
GNU General Public License v3.0
14.29k stars 889 forks source link

need the text in the image #204

Open alen978 opened 1 month ago

alen978 commented 1 month ago

use ai to write the script, but just got the result in the .txt

[36,10],[38,10],[37,13],[35,13]

there is "48" in the image, so how to get the finally result of "48"

////////// import os from surya.detection import batch_text_detection from surya.model.detection.model import load_model, load_processor from PIL import Image

def read_image(image_path): try: return Image.open(image_path) except Exception as e: print(f"Error opening image {image_path}: {e}") return None

def save_text_to_file(image_path, text):

Get the file name and create the corresponding .txt file path

txt_file_path = f"{image_path.rsplit('.', 1)[0]}.txt"

with open(txt_file_path, 'w', encoding='utf-8') as f:
    f.write(text)

def main(image_dir):

Load model and processor

model, processor = load_model(), load_processor()

for filename in os.listdir(image_dir):
    if not filename.endswith(".png"):
        continue

    image_path = os.path.join(image_dir, filename)

    try:
        img = read_image(image_path)

        if img is None:
            print(f"Skipping {image_path}")
            continue

        predictions = batch_text_detection([img], model=model, processor=processor)[0].bboxes

        detected_texts = ""

        for bbox in predictions:
            # Here you would typically extract the actual text instead of polygon coordinates.
            polygon_coords = str(bbox.polygon)[1:-1].replace(" ", "") + "\n"
            detected_texts += polygon_coords

        save_text_to_file(image_path, detected_texts)

        print(f"Processed {filename}, saved text to {image_path.rsplit('.', 1)[0]}.txt")

    except Exception as e:
        print(f"Error processing {image_path}: {e}")

Example usage: Monitor images for automatic recognition and save to .txt files.

main(r"D:\Microsoft VS Code\py\social\ocr")