text and PDF? - Githubissues

giandre commented 3 months ago

This tool is a little gem. The output quality is simply awesome, and it works great with epubs. I am not a developer but I tried ChatGPT to help me add more functionality to your code but I failed... Have you considered adding text and PDF functionality as well? This will make the tool even better!

Gauff commented 3 months ago

Hello,

Thank you :-)

I lack time currently, but I keep your suggestion in mind.

The part which would be the most time-consuming would be the split per chapter phase, since there is no clear definition for text or pdf. It would be a set of heuristic rules, thus not perfect. I already have several strategies in mind.

If we are lucky, pdf file will have a set of metadata, but if you have already worked with pdf, you know that a pdf can be surprising ;-)

Some background: Actually, I'm working on another project that I've not published yet. The difference is that it is a set of command line tools using the power of the pipe '|' operator in Linux. You can chain actions using it.

I've already developed:

tp : text processing tool which is able to load numerous file types and extracts text (currently audio and text). I'm adding support of the most common picture, audio, text, office document formats. So there is an overlap with your request. Fyi: tp does more: transcription, punctuation restoration, translation, and summarization.
tts : which converts the text in voice that you can either listen or record in a mp3 file.

In your case, read a text file to a mp3 would be: tp book.txt | tts --o book.mp3

giandre commented 3 months ago

Hi, I ended up using Claude 3 to help me with the code. I created a new PY file and left everything else the same. Now I can generate audio from a full text. I will try later to build the upload a file (PDF or DOCX) but for now this is doing the trick for me. Below is the code "I" used :). Thanks again for your answer.

import os import tkinter as tk from tkinter import filedialog, font, scrolledtext, ttk import customtkinter as ctk import my_edge_tts

class TextToSpeechUI(ctk.CTk):

def __init__(self):
    super().__init__()
    self.title("Text to Speech Converter")
    self.geometry("900x700")  # Adjusted initial window size
    ctk.set_appearance_mode("dark")  # Set the theme to dark mode
    ctk.set_default_color_theme("green")

    # Variables
    self.voice_var = tk.StringVar()
    self.output_file_path = tk.StringVar()
    self.playback_speed_percentage = tk.IntVar(value=100)
    self.volume_percentage = tk.IntVar(value=100)
    self.pitch_hz = tk.IntVar(value=0)

    # Main Frame
    self.main_frame = ctk.CTkFrame(self)
    self.main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

    # Rich Text Input
    self.text_label = ctk.CTkLabel(self.main_frame, text="Enter text:")
    self.text_label.pack(pady=(0, 5))

    self.text_input = scrolledtext.ScrolledText(self.main_frame, height=10, width=80, wrap=tk.WORD)  # Reduced initial height
    self.text_input.pack(fill=tk.BOTH, expand=True, pady=(0, 10))

    # Voice Dropdown
    self.voice_label = ctk.CTkLabel(self.main_frame, text="Select voice:")
    self.voice_label.pack(pady=(10, 5))
    self.voice_options = sorted(my_edge_tts.load_voices())

    # Create and configure the Combobox
    self.voice_combobox = ttk.Combobox(self.main_frame, textvariable=self.voice_var, values=self.voice_options)
    self.voice_combobox.pack(pady=(0, 10), fill=tk.X)
    self.voice_combobox.bind('<KeyRelease>', self.update_voice_list)

    # Style the Combobox to match the dark theme
    self.style = ttk.Style()
    self.style.theme_use('clam')
    self.style.configure('TCombobox', fieldbackground='#2b2b2b', background='#2b2b2b', foreground='white')
    self.style.map('TCombobox', fieldbackground=[('readonly', '#2b2b2b')])
    self.style.map('TCombobox', selectbackground=[('readonly', '#2b2b2b')])
    self.style.map('TCombobox', selectforeground=[('readonly', 'white')])

    # Playback Speed Slider
    self.playback_speed_label = ctk.CTkLabel(self.main_frame, text="Playback Speed Percentage:")
    self.playback_speed_label.pack(pady=(10, 5))
    self.playback_speed_slider = ttk.Scale(self.main_frame, from_=0, to=200, orient=tk.HORIZONTAL, variable=self.playback_speed_percentage, length=300)  # Reduced length
    self.playback_speed_slider.pack(fill=tk.X, padx=20)
    self.playback_speed_value_label = ctk.CTkLabel(self.main_frame, textvariable=self.playback_speed_percentage)
    self.playback_speed_value_label.pack(pady=(0, 10))

    # Volume Slider
    self.volume_label = ctk.CTkLabel(self.main_frame, text="Volume Percentage:")
    self.volume_label.pack(pady=(10, 5))
    self.volume_slider = ttk.Scale(self.main_frame, from_=50, to=200, orient=tk.HORIZONTAL, variable=self.volume_percentage, length=300)  # Reduced length
    self.volume_slider.pack(fill=tk.X, padx=20)
    self.volume_value_label = ctk.CTkLabel(self.main_frame, textvariable=self.volume_percentage)
    self.volume_value_label.pack(pady=(0, 10))

    # Pitch Slider
    self.pitch_label = ctk.CTkLabel(self.main_frame, text="Pitch Hz:")
    self.pitch_label.pack(pady=(10, 5))
    self.pitch_slider = ttk.Scale(self.main_frame, from_=-200, to=200, orient=tk.HORIZONTAL, variable=self.pitch_hz, length=300)  # Adjusted range and length
    self.pitch_slider.pack(fill=tk.X, padx=20)
    self.pitch_value_label = ctk.CTkLabel(self.main_frame, textvariable=self.pitch_hz)
    self.pitch_value_label.pack(pady=(0, 10))

    # Generate Button
    self.generate_button = ctk.CTkButton(self.main_frame, text="Generate Audio", command=self._generate_audio)
    self.generate_button.pack(pady=10)

    # Log Label
    self.log_label = ctk.CTkLabel(self.main_frame, text="")
    self.log_label.pack(pady=10)

def create_toolbar(self):
    toolbar = ctk.CTkFrame(self.main_frame)
    toolbar.pack(fill=tk.X, pady=(0, 5))

    # Bold button
    bold_button = ctk.CTkButton(toolbar, text="B", width=30, command=self.toggle_bold)
    bold_button.pack(side=tk.LEFT, padx=2)

    # Italic button
    italic_button = ctk.CTkButton(toolbar, text="I", width=30, command=self.toggle_italic)
    italic_button.pack(side=tk.LEFT, padx=2)

    # Underline button
    underline_button = ctk.CTkButton(toolbar, text="U", width=30, command=self.toggle_underline)
    underline_button.pack(side=tk.LEFT, padx=2)

def toggle_bold(self):
    current_tags = self.text_input.tag_names("sel.first")
    if "bold" in current_tags:
        self.text_input.tag_remove("bold", "sel.first", "sel.last")
    else:
        self.text_input.tag_add("bold", "sel.first", "sel.last")
    self.text_input.tag_configure("bold", font=font.Font(weight="bold"))

def toggle_italic(self):
    current_tags = self.text_input.tag_names("sel.first")
    if "italic" in current_tags:
        self.text_input.tag_remove("italic", "sel.first", "sel.last")
    else:
        self.text_input.tag_add("italic", "sel.first", "sel.last")
    self.text_input.tag_configure("italic", font=font.Font(slant="italic"))

def toggle_underline(self):
    current_tags = self.text_input.tag_names("sel.first")
    if "underline" in current_tags:
        self.text_input.tag_remove("underline", "sel.first", "sel.last")
    else:
        self.text_input.tag_add("underline", "sel.first", "sel.last")
    self.text_input.tag_configure("underline", underline=True)

def update_voice_list(self, event):
    value = event.widget.get()
    if value == '':
        self.voice_combobox['values'] = self.voice_options
    else:
        data = []
        for item in self.voice_options:
            if value.lower() in item.lower():
                data.append(item)
        self.voice_combobox['values'] = data

def _log(self, text):
    self.log_label.configure(text=text)
    self.update()

def _generate_audio(self):
    text = self.text_input.get("1.0", tk.END).strip()
    if not text:
        self._log("Please enter some text.")
        return

    voice = self.voice_var.get()
    if not voice:
        self._log("Please select a voice.")
        return

    output_file = filedialog.asksaveasfilename(defaultextension=".mp3", filetypes=[("MP3 files", "*.mp3")])
    if not output_file:
        return

    self._log("Generating audio...")

    try:
        # Create a temporary text file
        temp_file_path = "temp_text.txt"
        with open(temp_file_path, "w", encoding="utf-8") as temp_file:
            temp_file.write(text)

        # Generate MP3 file
        my_edge_tts.generate_mp3_file(
            temp_file_path,
            output_file,
            voice,
            self.playback_speed_percentage.get(),
            self.volume_percentage.get(),
            self.pitch_hz.get()
        )

        # Remove temporary file
        os.remove(temp_file_path)

        self._log(f"Audio generated successfully: {output_file}")
    except Exception as e:
        self._log(f"Error generating audio: {str(e)}")

if name == "main": app = TextToSpeechUI() app.mainloop()

Gauff / EpubToAudioBookConverter

text and PDF? #1