gkamradt / QuickAgent

255 stars 112 forks source link

error while not talking #9

Open MissCoco01 opened 2 months ago

MissCoco01 commented 2 months ago

Hi, I'm encountering an error when I run the code. If I don't start talking immediately after running the code, I get an error. Additionally, when I run the code and begin speaking, if I stay quiet for a short period, I encounter the same error.

RuntimeError: Task <Task pending name='Task-30' coro=<AsyncLiveClient.send() running at C:\Python\Python310\lib\site-packages\deepgram\clients\live\v1\async_client.py:179> cb=[_run_until_complete_cb() at C:\Python\Python310\lib\asyncio\base_events.py:184]> got Future attached to a different loop

is it because of this line 'dg_connection = deepgram.listen.asynclive.v("1")'

any help pls!!!

fluransco commented 1 month ago

same problem here

fluransco commented 1 month ago

finally fixed the problem

although, this version of code is not the real time streaming also does not use async import os import subprocess import time import aiohttp from dotenv import load_dotenv from langchain_core.prompts import ChatPromptTemplate from langchain_groq import ChatGroq import pyaudio import sounddevice as sd import numpy as np import requests import shutil from deepgram import ( DeepgramClient, DeepgramClientOptions, LiveTranscriptionEvents, LiveOptions, Microphone, SpeakOptions, )

from LiminaliaBot import deepgramSpeak import wave

load_dotenv()

class TranscriptCollector: def init(self): self.reset()

def reset(self):
    self.transcript_parts = []

def add_part(self, part):
    self.transcript_parts.append(part)

def get_full_transcript(self):
    return ' '.join(self.transcript_parts)

transcript_collector = TranscriptCollector()

def get_transcript(): try: config = DeepgramClientOptions(options={"keepalive": "true"}) deepgram: DeepgramClient = DeepgramClient("", config)

    dg_connection = deepgram.listen.live.v("1")

    def on_message(self, result, **kwargs):
        sentence = result.channel.alternatives[0].transcript

        if not result.speech_final:
            transcript_collector.add_part(sentence)
        else:
            # This is the final part of the current sentence
            transcript_collector.add_part(sentence)
            full_sentence = transcript_collector.get_full_transcript()
            print(f"speaker: {full_sentence}")

            # Check if the full sentence is empty
            if not full_sentence.strip():
                # Reset the collector for the next sentence
                transcript_collector.reset()
            else:
                process_llm(full_sentence)
                # Reset the collector for the next sentence
                transcript_collector.reset()

    def on_error(self, error, **kwargs):
        print(f"\n\n{error}\n\n")

    dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
    dg_connection.on(LiveTranscriptionEvents.Error, on_error)

    options: LiveOptions = LiveOptions(
        model="nova-2",
        language="en-US",
        # Apply smart formatting to the output
        smart_format=True,
        # Raw audio format details
        encoding="linear16",
        channels=1,
        sample_rate=16000,
        # Time in milliseconds of silence to wait for before finalizing speech
        endpointing=1200,
    )

    dg_connection.start(options)

    # Open a microphone stream on the selected input device
    microphone = Microphone(dg_connection.send)

    # Start microphone
    microphone.start()
    print("listening...")

    while True:
        if not microphone.is_active():
            print("idle zzZ")
            break
        time.sleep(1)

    # Wait for the microphone to close
    microphone.finish()

    # Indicate that we've finished
    dg_connection.finish()

    print("Finished")

except Exception as e:
    print(f"Could not open socket: {e}")
    return

def deepgramSpeak(text): SPEAK_OPTIONS = {"text": text} filename = "output.wav"

try:
    # Create a Deepgram client
    deepgram = DeepgramClient(api_key=os.getenv("DEEPGRAM_API_KEY"))

    # Configure the options
    options = SpeakOptions(
        model="aura-asteria-en",
        encoding="linear16",
        container="wav"
    )

    # Call the save method on the speak property
    response = deepgram.speak.v("1").save(filename, SPEAK_OPTIONS, options)

    # Play the audio using PyAudio
    wf = wave.open(filename, 'rb')
    p = pyaudio.PyAudio()
    stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                    channels=wf.getnchannels(),
                    rate=wf.getframerate(),
                    output=True)
    data = wf.readframes(1024)
    while data:
        stream.write(data)
        data = wf.readframes(1024)

    stream.stop_stream()
    stream.close()
    p.terminate()
    wf.close()

    # Delete the WAV file
    os.remove(filename)

except Exception as e:
    print(f"Exception: {e}")

def process_llm(transcript): chat = ChatGroq(temperature=1, model_name="llama3-8b-8192") prompt = ChatPromptTemplate.from_messages([("human", "You are a conversational assistant named Limina. Use short, conversational responses as if you're having a live conversation. Your response should be under 20 words. Do not respond with any code, only conversation {transcript}")]) chain = prompt | chat result = chain.invoke({"transcript": transcript}) print(result.content) response = result.content deepgramSpeak(response)

if name == "main": get_transcript()