SevaSk / ecoute

Ecoute is a live transcription tool that provides real-time transcripts for both the user's microphone input (You) and the user's speakers output (Speaker) in a textbox. It also generates a suggested response using OpenAI's GPT-3.5 for the user to say based on the live transcription of the conversation.
https://github.com/SevaSk/ecoute
MIT License
5.83k stars 817 forks source link

Any way to save the speaker/you output as a txt file? #64

Closed Evilander closed 1 year ago

Evilander commented 1 year ago

I added some code to the main.py but ended up just creating issues.

Evilander commented 1 year ago

...also - I am not sure, if any, usefulness or ideas this sparks - but this works for video game chat as well. I had it turned on playing call of duty and aside from mixing up my voice and my friends voices, it did a wonderful job with the transcription.

I need to get creative and figure out how we can use this on a larger scale. Anyway - any bit of help is welcomed.

Evilander commented 1 year ago

Nevermind. Solved it myself (with gpt4 ha).

import threading from AudioTranscriber import AudioTranscriber from GPTResponder import GPTResponder import customtkinter as ctk import AudioRecorder import queue import time import torch import sys import TranscriberModels

def oprint(message): print(message) global log log.write(message) return()

def write_in_textbox(textbox, text): textbox.delete("0.0", "end") textbox.insert("0.0", text)

def update_transcript_UI(transcriber, textbox, output_file): transcript_string = transcriber.get_transcript() write_in_textbox(textbox, transcript_string) output_file.write(transcript_string + "\n") # write transcript to file textbox.after(300, update_transcript_UI, transcriber, textbox, output_file) def update_response_UI(responder, textbox, update_interval_slider_label, update_interval_slider, freeze_state): if not freeze_state[0]: response = responder.response

    textbox.configure(state="normal")
    write_in_textbox(textbox, response)
    textbox.configure(state="disabled")

    update_interval = int(update_interval_slider.get())
    responder.update_response_interval(update_interval)
    update_interval_slider_label.configure(text=f"Update interval: {update_interval} seconds")

textbox.after(300, update_response_UI, responder, textbox, update_interval_slider_label, update_interval_slider, freeze_state)

def clear_context(transcriber, audio_queue): transcriber.clear_transcript_data() with audio_queue.mutex: audio_queue.queue.clear()

def create_ui_components(root): ctk.set_appearance_mode("dark") ctk.set_default_color_theme("dark-blue") root.title("Ecoute") root.configure(bg='#252422') root.geometry("1000x600")

font_size = 20

transcript_textbox = ctk.CTkTextbox(root, width=300, font=("Arial", font_size), text_color='#FFFCF2', wrap="word")
transcript_textbox.grid(row=0, column=0, padx=10, pady=20, sticky="nsew")

response_textbox = ctk.CTkTextbox(root, width=300, font=("Arial", font_size), text_color='#639cdc', wrap="word")
response_textbox.grid(row=0, column=1, padx=10, pady=20, sticky="nsew")
freeze_button = ctk.CTkButton(root, text="Freeze", command=None)
freeze_button.grid(row=1, column=1, padx=10, pady=3, sticky="nsew")

update_interval_slider_label = ctk.CTkLabel(root, text=f"", font=("Arial", 12), text_color="#FFFCF2")
update_interval_slider_label.grid(row=2, column=1, padx=10, pady=3, sticky="nsew")

update_interval_slider = ctk.CTkSlider(root, from_=1, to=10, width=300, height=20, number_of_steps=9)
update_interval_slider.set(2)
update_interval_slider.grid(row=3, column=1, padx=10, pady=10, sticky="nsew")

return transcript_textbox, response_textbox, update_interval_slider, update_interval_slider_label, freeze_button

def main(): root = ctk.CTk() transcript_textbox, response_textbox, update_interval_slider, update_interval_slider_label, freeze_button = create_ui_components(root)

audio_queue = queue.Queue()

user_audio_recorder = AudioRecorder.DefaultMicRecorder()
user_audio_recorder.record_into_queue(audio_queue)

time.sleep(2)

speaker_audio_recorder = AudioRecorder.DefaultSpeakerRecorder()
speaker_audio_recorder.record_into_queue(audio_queue)

model = TranscriberModels.get_model('--api' in sys.argv)

transcriber = AudioTranscriber(user_audio_recorder.source, speaker_audio_recorder.source, model)
transcribe = threading.Thread(target=transcriber.transcribe_audio_queue, args=(audio_queue,))
transcribe.daemon = True
transcribe.start()

responder = GPTResponder()
respond = threading.Thread(target=responder.respond_to_transcriber, args=(transcriber,))
respond.daemon = True
respond.start()

print("READY")

root.grid_rowconfigure(0, weight=100)
root.grid_rowconfigure(1, weight=1)
root.grid_rowconfigure(2, weight=1)
root.grid_rowconfigure(3, weight=1)
root.grid_columnconfigure(0, weight=2)
root.grid_columnconfigure(1, weight=1)

# Add the clear transcript button to the UI
clear_transcript_button = ctk.CTkButton(root, text="Clear Transcript", command=lambda: clear_context(transcriber, audio_queue, ))
clear_transcript_button.grid(row=1, column=0, padx=10, pady=3, sticky="nsew")

freeze_state = [False]  # Using list to be able to change its content inside inner functions
def freeze_unfreeze():
    freeze_state[0] = not freeze_state[0]  # Invert the freeze state
    freeze_button.configure(text="Unfreeze" if freeze_state[0] else "Freeze")

freeze_button.configure(command=freeze_unfreeze)

update_interval_slider_label.configure(text=f"Update interval: {update_interval_slider.get()} seconds")

# Open output file
output_file = open("F:/Working AI Tools/Logs/output.txt", "w", encoding='utf-8')

update_transcript_UI(transcriber, transcript_textbox, output_file)
update_response_UI(responder, response_textbox, update_interval_slider_label, update_interval_slider, freeze_state)

root.mainloop()

# Close the output file
output_file.close()

if name == "main": main()

SevaSk commented 1 year ago

Nice solution!