If reporting a bug, please fill out the following:
Environment
pipecat-ai version: 0.0.48
python version: 3.10
OS: macOS/Ubuntu
Issue description
Provide a clear description of the issue.
audio_mixer not working with websocket transport, as soon as i turn it on, it blocks my websocket connection and gets stuck on loading part of audio, further more it is causing ram to go out of memory in no time for even audios of less than 200 KB in size.
Repro steps
List the steps to reproduce the issue.
import asyncio
import os
import sys
from deepgram import LiveOptions
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask, PipelineParams
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.network.websocket_server import WebsocketServerTransport, WebsocketServerParams
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
DESIRED_SAMPLE_RATE = 8000
async def main():
mixer = SoundfileMixer(
sound_files={"office": "assets/office-ambience.mp3"},
default_sound="office",
volume=2.0,
loop=True,
)
transport = WebsocketServerTransport(
params=WebsocketServerParams(
audio_in_channels=1,
audio_in_enabled=True,
audio_in_sample_rate=DESIRED_SAMPLE_RATE,
audio_out_sample_rate=DESIRED_SAMPLE_RATE,
audio_out_enabled=True,
add_wav_header=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(
params=VADParams(
start_secs=0.1,
),
),
vad_audio_passthrough=True,
audio_out_mixer=mixer,
),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
live_options=LiveOptions(
language="hi",
model="nova-2",
sample_rate=DESIRED_SAMPLE_RATE,
),
)
tts = ElevenLabsTTSService(
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)
messages = [
{
"role": "system",
"content": "Hello, how can I help you?",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Websocket input from client
stt, # Speech-To-Text
context_aggregator.user(),
llm, # LLM
tts, # Text-To-Speech
transport.output(), # Websocket output to client
context_aggregator.assistant(),
],
)
task = PipelineTask(
pipeline,
params=PipelineParams(allow_interruptions=True, enable_metrics=True, enable_usage_metrics=True),
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
# Kick off the conversation.
messages.append(
{"role": "system", "content": "You are a female assistant, Please introduce yourself to the user."},
)
await task.queue_frames([LLMMessagesFrame(messages)])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())
Expected behavior
Audio mixer and websocket should work parallely.
Actual behavior
Audio mixer blocking websocket and causing RAM to go out of memory
Description
Is this reporting a bug or feature request? bug
If reporting a bug, please fill out the following:
Environment
Issue description
Provide a clear description of the issue. audio_mixer not working with websocket transport, as soon as i turn it on, it blocks my websocket connection and gets stuck on loading part of audio, further more it is causing ram to go out of memory in no time for even audios of less than 200 KB in size.
Repro steps
List the steps to reproduce the issue.
Expected behavior
Audio mixer and websocket should work parallely.
Actual behavior
Audio mixer blocking websocket and causing RAM to go out of memory
Logs