Hi, Deepgram developers. I love Deepgram API. It is really quick
I already checked your live demo using OpenAI
It's really amazing
So I tried to implement speech-to-speech with Deepgram API and OpenAI GPT-4o model
But it has long delay time to generate AI audio response.
This is my current code, but I amnot sure what is issue
const getResponse = async () => {
let transcript = "";
let ai_response = "";
try {
function readAudioFile(file) {
const reader = new FileReader();
const headers = {
Authorization: `Token ${process.env.DEEPGRAM_API_KEY}`,
"Content-Type": "audio/wav",
};
reader.onload = async function (event) {
const audioData = event.target.result;
await axios
.post("https://api.deepgram.com/v1/listen", audioData, { headers: headers })
.then((response) => {
if (response.data) {
transcript = response.data.results.channels[0].alternatives[0].transcript;
}
}).catch((error) => {
console.error("Error while transcripting:", error); // Handle errors
});
// Get response from OpenAI GPT-4o
...
// Generate audio from text and play
const config = {
headers: {
Authorization: `Token ${process.env.DEEPGRAM_API_KEY}`,
"Content-Type": "application/json",
},
};
const data = {
text: ai_response,
};
const response = await fetch("https://api.deepgram.com/v1/speak?model=aura-zeus-en",
{
method: "POST",
headers: { ...config.headers },
body: JSON.stringify(data)
}
);
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const source = audioContext.createBufferSource();
// Fetch the audio data as an ArrayBuffer
const arrayBuffer = await response.arrayBuffer();
audioContext.decodeAudioData(
arrayBuffer,
(buffer) => {
source.buffer = buffer;
source.connect(audioContext.destination);
source.start(0);
},
(e) => {
console.log("Error with decoding audio data" + e.err);
}
);
};
// Define what happens on error
reader.onerror = function (event) {
console.error(
"File could not be read! Code " + event.target.error.code
);
};
// Read the file as an ArrayBuffer (useful for binary files like audio)
reader.readAsArrayBuffer(file);
}
const file = new File([recordedBlob.blob], "recording.wav", {
type: "audio/wav",
});
readAudioFile(file);
} catch (err) {
console.log(err);
}
};
@GoldenDragon0710 👋 Since this is a question please post in Deepgram GitHub Discussions or on Discord and some one from our Community can try to help you.
Hi, Deepgram developers. I love Deepgram API. It is really quick I already checked your live demo using OpenAI It's really amazing So I tried to implement speech-to-speech with Deepgram API and OpenAI GPT-4o model But it has long delay time to generate AI audio response.
This is my current code, but I amnot sure what is issue