Gauff / EpubToAudioBookConverter

Convert EPUB files to MP3 audio books with ease using this intuitive and user-friendly converter that allows you to select voices and playback settings with just a few clicks.
MIT License
7 stars 1 forks source link

text and PDF? #1

Closed giandre closed 1 month ago

giandre commented 3 months ago

This tool is a little gem. The output quality is simply awesome, and it works great with epubs. I am not a developer but I tried ChatGPT to help me add more functionality to your code but I failed... Have you considered adding text and PDF functionality as well? This will make the tool even better!

Gauff commented 3 months ago

Hello,

Thank you :-)

I lack time currently, but I keep your suggestion in mind.

The part which would be the most time-consuming would be the split per chapter phase, since there is no clear definition for text or pdf. It would be a set of heuristic rules, thus not perfect. I already have several strategies in mind.

If we are lucky, pdf file will have a set of metadata, but if you have already worked with pdf, you know that a pdf can be surprising ;-)


Some background: Actually, I'm working on another project that I've not published yet. The difference is that it is a set of command line tools using the power of the pipe '|' operator in Linux. You can chain actions using it.

I've already developed:

In your case, read a text file to a mp3 would be: tp book.txt | tts --o book.mp3

giandre commented 3 months ago

Hi, I ended up using Claude 3 to help me with the code. I created a new PY file and left everything else the same. Now I can generate audio from a full text. I will try later to build the upload a file (PDF or DOCX) but for now this is doing the trick for me. Below is the code "I" used :). Thanks again for your answer.

import os import tkinter as tk from tkinter import filedialog, font, scrolledtext, ttk import customtkinter as ctk import my_edge_tts

class TextToSpeechUI(ctk.CTk):

def __init__(self):
    super().__init__()
    self.title("Text to Speech Converter")
    self.geometry("900x700")  # Adjusted initial window size
    ctk.set_appearance_mode("dark")  # Set the theme to dark mode
    ctk.set_default_color_theme("green")

    # Variables
    self.voice_var = tk.StringVar()
    self.output_file_path = tk.StringVar()
    self.playback_speed_percentage = tk.IntVar(value=100)
    self.volume_percentage = tk.IntVar(value=100)
    self.pitch_hz = tk.IntVar(value=0)

    # Main Frame
    self.main_frame = ctk.CTkFrame(self)
    self.main_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)

    # Rich Text Input
    self.text_label = ctk.CTkLabel(self.main_frame, text="Enter text:")
    self.text_label.pack(pady=(0, 5))

    self.text_input = scrolledtext.ScrolledText(self.main_frame, height=10, width=80, wrap=tk.WORD)  # Reduced initial height
    self.text_input.pack(fill=tk.BOTH, expand=True, pady=(0, 10))

    # Voice Dropdown
    self.voice_label = ctk.CTkLabel(self.main_frame, text="Select voice:")
    self.voice_label.pack(pady=(10, 5))
    self.voice_options = sorted(my_edge_tts.load_voices())

    # Create and configure the Combobox
    self.voice_combobox = ttk.Combobox(self.main_frame, textvariable=self.voice_var, values=self.voice_options)
    self.voice_combobox.pack(pady=(0, 10), fill=tk.X)
    self.voice_combobox.bind('<KeyRelease>', self.update_voice_list)

    # Style the Combobox to match the dark theme
    self.style = ttk.Style()
    self.style.theme_use('clam')
    self.style.configure('TCombobox', fieldbackground='#2b2b2b', background='#2b2b2b', foreground='white')
    self.style.map('TCombobox', fieldbackground=[('readonly', '#2b2b2b')])
    self.style.map('TCombobox', selectbackground=[('readonly', '#2b2b2b')])
    self.style.map('TCombobox', selectforeground=[('readonly', 'white')])

    # Playback Speed Slider
    self.playback_speed_label = ctk.CTkLabel(self.main_frame, text="Playback Speed Percentage:")
    self.playback_speed_label.pack(pady=(10, 5))
    self.playback_speed_slider = ttk.Scale(self.main_frame, from_=0, to=200, orient=tk.HORIZONTAL, variable=self.playback_speed_percentage, length=300)  # Reduced length
    self.playback_speed_slider.pack(fill=tk.X, padx=20)
    self.playback_speed_value_label = ctk.CTkLabel(self.main_frame, textvariable=self.playback_speed_percentage)
    self.playback_speed_value_label.pack(pady=(0, 10))

    # Volume Slider
    self.volume_label = ctk.CTkLabel(self.main_frame, text="Volume Percentage:")
    self.volume_label.pack(pady=(10, 5))
    self.volume_slider = ttk.Scale(self.main_frame, from_=50, to=200, orient=tk.HORIZONTAL, variable=self.volume_percentage, length=300)  # Reduced length
    self.volume_slider.pack(fill=tk.X, padx=20)
    self.volume_value_label = ctk.CTkLabel(self.main_frame, textvariable=self.volume_percentage)
    self.volume_value_label.pack(pady=(0, 10))

    # Pitch Slider
    self.pitch_label = ctk.CTkLabel(self.main_frame, text="Pitch Hz:")
    self.pitch_label.pack(pady=(10, 5))
    self.pitch_slider = ttk.Scale(self.main_frame, from_=-200, to=200, orient=tk.HORIZONTAL, variable=self.pitch_hz, length=300)  # Adjusted range and length
    self.pitch_slider.pack(fill=tk.X, padx=20)
    self.pitch_value_label = ctk.CTkLabel(self.main_frame, textvariable=self.pitch_hz)
    self.pitch_value_label.pack(pady=(0, 10))

    # Generate Button
    self.generate_button = ctk.CTkButton(self.main_frame, text="Generate Audio", command=self._generate_audio)
    self.generate_button.pack(pady=10)

    # Log Label
    self.log_label = ctk.CTkLabel(self.main_frame, text="")
    self.log_label.pack(pady=10)

def create_toolbar(self):
    toolbar = ctk.CTkFrame(self.main_frame)
    toolbar.pack(fill=tk.X, pady=(0, 5))

    # Bold button
    bold_button = ctk.CTkButton(toolbar, text="B", width=30, command=self.toggle_bold)
    bold_button.pack(side=tk.LEFT, padx=2)

    # Italic button
    italic_button = ctk.CTkButton(toolbar, text="I", width=30, command=self.toggle_italic)
    italic_button.pack(side=tk.LEFT, padx=2)

    # Underline button
    underline_button = ctk.CTkButton(toolbar, text="U", width=30, command=self.toggle_underline)
    underline_button.pack(side=tk.LEFT, padx=2)

def toggle_bold(self):
    current_tags = self.text_input.tag_names("sel.first")
    if "bold" in current_tags:
        self.text_input.tag_remove("bold", "sel.first", "sel.last")
    else:
        self.text_input.tag_add("bold", "sel.first", "sel.last")
    self.text_input.tag_configure("bold", font=font.Font(weight="bold"))

def toggle_italic(self):
    current_tags = self.text_input.tag_names("sel.first")
    if "italic" in current_tags:
        self.text_input.tag_remove("italic", "sel.first", "sel.last")
    else:
        self.text_input.tag_add("italic", "sel.first", "sel.last")
    self.text_input.tag_configure("italic", font=font.Font(slant="italic"))

def toggle_underline(self):
    current_tags = self.text_input.tag_names("sel.first")
    if "underline" in current_tags:
        self.text_input.tag_remove("underline", "sel.first", "sel.last")
    else:
        self.text_input.tag_add("underline", "sel.first", "sel.last")
    self.text_input.tag_configure("underline", underline=True)

def update_voice_list(self, event):
    value = event.widget.get()
    if value == '':
        self.voice_combobox['values'] = self.voice_options
    else:
        data = []
        for item in self.voice_options:
            if value.lower() in item.lower():
                data.append(item)
        self.voice_combobox['values'] = data

def _log(self, text):
    self.log_label.configure(text=text)
    self.update()

def _generate_audio(self):
    text = self.text_input.get("1.0", tk.END).strip()
    if not text:
        self._log("Please enter some text.")
        return

    voice = self.voice_var.get()
    if not voice:
        self._log("Please select a voice.")
        return

    output_file = filedialog.asksaveasfilename(defaultextension=".mp3", filetypes=[("MP3 files", "*.mp3")])
    if not output_file:
        return

    self._log("Generating audio...")

    try:
        # Create a temporary text file
        temp_file_path = "temp_text.txt"
        with open(temp_file_path, "w", encoding="utf-8") as temp_file:
            temp_file.write(text)

        # Generate MP3 file
        my_edge_tts.generate_mp3_file(
            temp_file_path,
            output_file,
            voice,
            self.playback_speed_percentage.get(),
            self.volume_percentage.get(),
            self.pitch_hz.get()
        )

        # Remove temporary file
        os.remove(temp_file_path)

        self._log(f"Audio generated successfully: {output_file}")
    except Exception as e:
        self._log(f"Error generating audio: {str(e)}")

if name == "main": app = TextToSpeechUI() app.mainloop()