python-openxml / python-docx

Create and modify Word documents with Python
MIT License
4.55k stars 1.12k forks source link

How to make words that already exist in the document bold #1237

Closed bwsoftw closed 1 year ago

bwsoftw commented 1 year ago

I'm having trouble taking existing words in the document and modifying them with the library. The times I managed to leave it in bold, it was in the paragraph where it contained the words that I wanted to leave in bold, instead of leaving just the word. Here is my code:

`` import os import shutil from docx import Document import requests from senhaapi import API_KEY import json from docxtpl import DocxTemplate import docx from docx.shared import Pt import re from docx.oxml.ns import qn from docx.oxml import OxmlElement from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.shared import RGBColor

ans = read_multiple_choice( "Escolha a matéria da prova que será submetida", [{"label": "Português", "value": "portugues"}, {"label": "Matemática", "value": "matematica"}, {"label": "Geografia", "value": "geografia"}, {"label": "História", "value": "historia"}, {"label": "Física", "value": "fisica"}, {"label": "Química", "value": "quimica"}, {"label": "Literatura", "value": "Literatura"}, {"label": "Inglês", "value": "ingles"}, {"label": "Espanhol", "value": "espanhol"}, ], )

if ans == "portugues": print("Processing the file for Português") # Verifique se chegou a esta parte file_response = read_file("Enviar") file_name = file_response.name

# Verificar se o arquivo tem a extensão .docx
if not file_name.endswith(".docx"):
    display("A prova enviada deve estar no formato .docx", size='medium')
else:
    # Restante do seu código para processar o arquivo .docx
    script_dir = os.getcwd()
    destination_dir = os.path.join(script_dir, "foo/bar")
    os.makedirs(destination_dir, exist_ok=True)
    original_file_path = os.path.join(destination_dir, file_name)
    with open(original_file_path, "wb") as destination_file:
        shutil.copyfileobj(file_response.file, destination_file)

    # Abrir o documento com python-docx
    document = Document(original_file_path)

    texto_a_adicionar = "Prova Adaptada"

    for paragraph in document.paragraphs:
        if not paragraph.text.strip():  # Verificar se o parágrafo está vazio
            run = paragraph.add_run(texto_a_adicionar)
            font = run.font
            font.size = Pt(8)
            break  # Parar após adicionar o texto

    for paragraph in document.paragraphs:
        for run in paragraph.runs:
            run.font.name = 'Arial'
            run.font.size = Pt(14)  # Tamanho da fonte em pontos
    questoes = {}
    questao_atual = None

    # Percorra os parágrafos do documento
    for paragraph in document.paragraphs:
        text = paragraph.text.strip()

        # Verifique se o parágrafo começa com um número seguido por um parêntese
        if text and text[0].isdigit() and text[1:2] == ")":
            # Armazene a questão anterior, se houver
            if questao_atual is not None:
                questoes[questao_numero] = questao_atual.strip()

            # Inicialize a nova questão
            questao_numero = int(text.split(")", 1)[0])
            questao_atual = text.split(")", 1)[1]
        else:
            # Continue a construir a questão atual
            if questao_atual is not None:
                questao_atual += " " + text

    # Armazene a última questão
    if questao_atual is not None:
        questoes[questao_numero] = questao_atual.strip()

    keywords = []

    # Process the questions to extract keywords
    for question_number, question_content in questoes.items():
        # Split the question content into words
        words = re.findall(r'\w+', question_content)
        keywords.extend(words)  # Add words to the keywords list

    api_message = f"Verificar dentro de {', '.join(keywords)} quais são as palavras chaves ou verbos de comando, não mude o tempo verbal das palavras-chaves ou verbos de comando. Só me mostra na resposta apenas o que eu pedi, sem texto antes."

    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
    link = "https://api.openai.com/v1/chat/completions"

    id_modelo = "gpt-3.5-turbo"

    body_api = {
       "model": id_modelo,
       "temperature": 0.3,
       "messages": [{"role": "user", "content": api_message}]
    }

    body_api = json.dumps(body_api)

    request = requests.post(link, headers=headers, data=body_api)
    response = request.json()
    message = response["choices"][0]["message"]["content"]
    print(request)
    print(request.text)

    # Converta a mensagem em minúsculas para facilitar a comparação
    message_lower = message.lower()

    # Crie uma lista para armazenar as palavras-chave encontradas no documento
    palavras_chave_encontradas = []

    # Percorra as palavras-chave e verifique se elas estão presentes na mensagem
    for keyword in keywords:
        keyword_pattern = rf'\b{re.escape(keyword.lower())}\b'  # Usamos \b para marcar limites de palavras completas
        if re.search(keyword_pattern, message_lower):
            palavras_chave_encontradas.append(keyword)

    # Imprima as palavras-chave encontradas
    print("Palavras-chave encontradas no documento:", palavras_chave_encontradas)

    def apply_bold_to_keywords(text, keywords):
        modified_text = text
        for keyword in keywords:
            keyword_pattern = rf'\b{re.escape(keyword.lower())}\b'
            modified_text = re.sub(keyword_pattern, lambda match: f'<b>{match.group()}</b>', modified_text,
                                   flags=re.IGNORECASE)
        return modified_text

    document_to_save = Document()

    for paragraph in document.paragraphs:
        modified_paragraph = document_to_save.add_paragraph()

        runs = []
        current_run_text = ''

        for char in paragraph.text:
            if char == '<':
                if current_run_text.strip():
                    runs.append(current_run_text)
                    current_run_text = ''
                current_run_text += char
            elif char == '>':
                current_run_text += char
                runs.append(current_run_text)
                current_run_text = ''
            else:
                current_run_text += char

        if current_run_text.strip():
            runs.append(current_run_text)

        for run_text in runs:
            if run_text.startswith('<') and run_text.endswith('>'):
                new_run = modified_paragraph.add_run()
                new_run.text = run_text
                new_run.bold = True
            else:
                new_run = modified_paragraph.add_run()
                new_run.text = run_text

    modified_file_path = os.path.join(destination_dir, "modified_" + file_name)
    document_to_save.save(modified_file_path)

else: display("Selecione uma opção válida", size='medium')

display, file_response and read_multiple_choice are from a library of my work that I'm using. and that part of the code where you use this is working perfectly.

Thanks for the help!

scanny commented 1 year ago

Have a look at this StackOverflow answer: https://stackoverflow.com/a/68507579/1902513

Basically, only a run can be made bold or not. A run is a sequence of characters that share the same character formatting. So if the word you want to bold is not in a run by itself, you need to make it so and then apply bold to that run. That's what the linked code does.