Open taowang1993 opened 1 month ago
Hi, your plugin is great. I am very inspired by your work.
I also want to build a mini-app on top of edge tts.
My idea is a floating audio player that anyone can embed on their website.
Then, users can read your website with microsoft tts.
Here is my code. I can get it to load the voices, but I can't get it to read the web page content.
So I am here to ask for help.
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Floating TTS Audio Player</title> <style> #tts-player { position: fixed; bottom: 20px; left: 50%; transform: translateX(-50%); background-color: #f0f0f0; border-radius: 25px; padding: 10px 20px; display: flex; align-items: center; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); } #tts-player button { background: none; border: none; font-size: 24px; cursor: pointer; margin: 0 10px; } #tts-player select, #tts-player input { margin: 0 10px; } </style> </head> <body> <h1>Welcome to the TTS-enabled webpage</h1> <p>This is a sample paragraph that can be read aloud using the TTS player below.</p> <div id="tts-player"> <button id="play-pause">▶️</button> <select id="voice-select"></select> <input type="range" id="speed-control" min="0.5" max="2" step="0.1" value="1"> <span id="speed-value">1x</span> </div> <script> const BASE_URL = "speech.platform.bing.com/consumer/speech/synthesize/readaloud"; const TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"; const VOICES_URL = `https://${BASE_URL}/voices/list?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`; const SYNTH_URL = `wss://${BASE_URL}/edge/v1?TrustedClientToken=${TRUSTED_CLIENT_TOKEN}`; const BINARY_DELIM = "Path:audio\r\n"; const VOICE_LANG_REGEX = /\w{2}-\w{2}/; const playPauseButton = document.getElementById('play-pause'); const voiceSelect = document.getElementById('voice-select'); const speedControl = document.getElementById('speed-control'); const speedValue = document.getElementById('speed-value'); let isPlaying = false; let audioContext; let sourceNode; let websocket; let reconnectAttempts = 0; let voices = []; // Initialize audio context function initAudioContext() { audioContext = new (window.AudioContext || window.webkitAudioContext)(); } // Format voices function formatVoices(voices) { return voices.map(v => ({ language: v.FriendlyName.match(/- ([a-zA-Z]+) \(/)[1], country: v.FriendlyName.match(/- .*\(([^)]+)\)/)[1], name: v.FriendlyName.match(/Microsoft (.+) Online/)[1], gender: v.Gender, shortName: v.ShortName, })).reduce((acc, voice) => { acc[voice.language] = acc[voice.language] || {}; acc[voice.language][voice.country] = acc[voice.language][voice.country] || {}; acc[voice.language][voice.country][voice.name] = { name: voice.name, shortName: voice.shortName }; return acc; }, {}); } // Fetch available voices async function fetchVoices() { try { const response = await fetch(VOICES_URL); voices = await response.json(); const formattedVoices = formatVoices(voices); for (const language in formattedVoices) { const optgroup = document.createElement('optgroup'); optgroup.label = language; for (const country in formattedVoices[language]) { for (const name in formattedVoices[language][country]) { const voice = formattedVoices[language][country][name]; const option = document.createElement('option'); option.value = voice.shortName; option.textContent = `${country} - ${name}`; optgroup.appendChild(option); } } voiceSelect.appendChild(optgroup); } console.log('Voices loaded:', formattedVoices); } catch (error) { console.error('Error fetching voices:', error); } } // Connect to WebSocket function connectWebSocket() { if (websocket && websocket.readyState === WebSocket.OPEN) { console.log('WebSocket is already connected'); return; } const connectionId = Math.random().toString(36).substr(2, 10); websocket = new WebSocket(`${SYNTH_URL}&ConnectionId=${connectionId}`); websocket.onopen = () => { console.log('WebSocket connected'); reconnectAttempts = 0; }; websocket.onmessage = (event) => { if (typeof event.data === 'string') { const message = JSON.parse(event.data); console.log('Received message:', message); } else { const reader = new FileReader(); reader.onload = () => { const audioData = reader.result.split(BINARY_DELIM)[1]; if (audioData) { playAudio(audioData); } }; reader.readAsText(event.data); } }; websocket.onerror = (error) => { console.error('WebSocket error:', error); }; websocket.onclose = (event) => { console.log('WebSocket closed:', event); reconnectWithBackoff(); }; } function reconnectWithBackoff() { const backoffTime = Math.min(30000, (Math.pow(2, reconnectAttempts) - 1) * 1000); console.log(`Attempting to reconnect in ${backoffTime}ms...`); setTimeout(() => { connectWebSocket(); reconnectAttempts++; }, backoffTime); } // Play audio function playAudio(audioData) { const arrayBuffer = new Uint8Array(atob(audioData).split('').map(char => char.charCodeAt(0))).buffer; audioContext.decodeAudioData(arrayBuffer, (buffer) => { sourceNode = audioContext.createBufferSource(); sourceNode.buffer = buffer; sourceNode.connect(audioContext.destination); sourceNode.playbackRate.value = parseFloat(speedControl.value); sourceNode.start(0); }); } // Send TTS request function sendTTSRequest(text) { if (!websocket || websocket.readyState !== WebSocket.OPEN) { console.error('WebSocket is not ready. Reconnecting...'); connectWebSocket(); setTimeout(() => sendTTSRequest(text), 1000); // Retry after 1 second return; } const selectedVoice = voices.find(v => v.ShortName === voiceSelect.value); const voiceLocale = selectedVoice.Locale.match(VOICE_LANG_REGEX)[0]; const request = { context: { synthesis: { audio: { metadataOptions: { sentenceBoundaryEnabled: "false", wordBoundaryEnabled: "false" }, outputFormat: "audio-24khz-48kbitrate-mono-mp3" } } }, ssml: `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${voiceLocale}'><voice name='${selectedVoice.ShortName}'><prosody rate='${speedControl.value}'>${text}</prosody></voice></speak>` }; websocket.send(JSON.stringify(request)); } // Toggle play/pause function togglePlayPause() { if (!audioContext) { initAudioContext(); } if (!websocket || websocket.readyState !== WebSocket.OPEN) { connectWebSocket(); setTimeout(togglePlayPause, 1000); // Retry after 1 second return; } if (isPlaying) { if (sourceNode) { sourceNode.stop(); } playPauseButton.textContent = '▶️'; } else { const pageText = document.body.innerText; sendTTSRequest(pageText); playPauseButton.textContent = '⏸️'; } isPlaying = !isPlaying; } // Event listeners playPauseButton.addEventListener('click', togglePlayPause); speedControl.addEventListener('input', () => { speedValue.textContent = `${speedControl.value}x`; if (sourceNode) { sourceNode.playbackRate.value = parseFloat(speedControl.value); } }); // Initialize fetchVoices(); connectWebSocket(); </script> </body> </html>
Hi, your plugin is great. I am very inspired by your work.
I also want to build a mini-app on top of edge tts.
My idea is a floating audio player that anyone can embed on their website.
Then, users can read your website with microsoft tts.
Here is my code. I can get it to load the voices, but I can't get it to read the web page content.
So I am here to ask for help.