Closed Evilander closed 1 year ago
...also - I am not sure, if any, usefulness or ideas this sparks - but this works for video game chat as well. I had it turned on playing call of duty and aside from mixing up my voice and my friends voices, it did a wonderful job with the transcription.
I need to get creative and figure out how we can use this on a larger scale. Anyway - any bit of help is welcomed.
Nevermind. Solved it myself (with gpt4 ha).
import threading from AudioTranscriber import AudioTranscriber from GPTResponder import GPTResponder import customtkinter as ctk import AudioRecorder import queue import time import torch import sys import TranscriberModels
def oprint(message): print(message) global log log.write(message) return()
def write_in_textbox(textbox, text): textbox.delete("0.0", "end") textbox.insert("0.0", text)
def update_transcript_UI(transcriber, textbox, output_file): transcript_string = transcriber.get_transcript() write_in_textbox(textbox, transcript_string) output_file.write(transcript_string + "\n") # write transcript to file textbox.after(300, update_transcript_UI, transcriber, textbox, output_file) def update_response_UI(responder, textbox, update_interval_slider_label, update_interval_slider, freeze_state): if not freeze_state[0]: response = responder.response
textbox.configure(state="normal")
write_in_textbox(textbox, response)
textbox.configure(state="disabled")
update_interval = int(update_interval_slider.get())
responder.update_response_interval(update_interval)
update_interval_slider_label.configure(text=f"Update interval: {update_interval} seconds")
textbox.after(300, update_response_UI, responder, textbox, update_interval_slider_label, update_interval_slider, freeze_state)
def clear_context(transcriber, audio_queue): transcriber.clear_transcript_data() with audio_queue.mutex: audio_queue.queue.clear()
def create_ui_components(root): ctk.set_appearance_mode("dark") ctk.set_default_color_theme("dark-blue") root.title("Ecoute") root.configure(bg='#252422') root.geometry("1000x600")
font_size = 20
transcript_textbox = ctk.CTkTextbox(root, width=300, font=("Arial", font_size), text_color='#FFFCF2', wrap="word")
transcript_textbox.grid(row=0, column=0, padx=10, pady=20, sticky="nsew")
response_textbox = ctk.CTkTextbox(root, width=300, font=("Arial", font_size), text_color='#639cdc', wrap="word")
response_textbox.grid(row=0, column=1, padx=10, pady=20, sticky="nsew")
freeze_button = ctk.CTkButton(root, text="Freeze", command=None)
freeze_button.grid(row=1, column=1, padx=10, pady=3, sticky="nsew")
update_interval_slider_label = ctk.CTkLabel(root, text=f"", font=("Arial", 12), text_color="#FFFCF2")
update_interval_slider_label.grid(row=2, column=1, padx=10, pady=3, sticky="nsew")
update_interval_slider = ctk.CTkSlider(root, from_=1, to=10, width=300, height=20, number_of_steps=9)
update_interval_slider.set(2)
update_interval_slider.grid(row=3, column=1, padx=10, pady=10, sticky="nsew")
return transcript_textbox, response_textbox, update_interval_slider, update_interval_slider_label, freeze_button
def main(): root = ctk.CTk() transcript_textbox, response_textbox, update_interval_slider, update_interval_slider_label, freeze_button = create_ui_components(root)
audio_queue = queue.Queue()
user_audio_recorder = AudioRecorder.DefaultMicRecorder()
user_audio_recorder.record_into_queue(audio_queue)
time.sleep(2)
speaker_audio_recorder = AudioRecorder.DefaultSpeakerRecorder()
speaker_audio_recorder.record_into_queue(audio_queue)
model = TranscriberModels.get_model('--api' in sys.argv)
transcriber = AudioTranscriber(user_audio_recorder.source, speaker_audio_recorder.source, model)
transcribe = threading.Thread(target=transcriber.transcribe_audio_queue, args=(audio_queue,))
transcribe.daemon = True
transcribe.start()
responder = GPTResponder()
respond = threading.Thread(target=responder.respond_to_transcriber, args=(transcriber,))
respond.daemon = True
respond.start()
print("READY")
root.grid_rowconfigure(0, weight=100)
root.grid_rowconfigure(1, weight=1)
root.grid_rowconfigure(2, weight=1)
root.grid_rowconfigure(3, weight=1)
root.grid_columnconfigure(0, weight=2)
root.grid_columnconfigure(1, weight=1)
# Add the clear transcript button to the UI
clear_transcript_button = ctk.CTkButton(root, text="Clear Transcript", command=lambda: clear_context(transcriber, audio_queue, ))
clear_transcript_button.grid(row=1, column=0, padx=10, pady=3, sticky="nsew")
freeze_state = [False] # Using list to be able to change its content inside inner functions
def freeze_unfreeze():
freeze_state[0] = not freeze_state[0] # Invert the freeze state
freeze_button.configure(text="Unfreeze" if freeze_state[0] else "Freeze")
freeze_button.configure(command=freeze_unfreeze)
update_interval_slider_label.configure(text=f"Update interval: {update_interval_slider.get()} seconds")
# Open output file
output_file = open("F:/Working AI Tools/Logs/output.txt", "w", encoding='utf-8')
update_transcript_UI(transcriber, transcript_textbox, output_file)
update_response_UI(responder, response_textbox, update_interval_slider_label, update_interval_slider, freeze_state)
root.mainloop()
# Close the output file
output_file.close()
if name == "main": main()
Nice solution!
I added some code to the main.py but ended up just creating issues.