ibm-early-programs / watson-speech-sockets

Example showing how to make use of Watson Speech To Text with Web Sockets in Node-RED
Apache License 2.0
1 stars 1 forks source link

watson-speech-sockets

Example showing how to make use of Watson Speech To Text with Web Sockets in Node-RED

Usage

This html and flow show how to use the Watson Speech to Text Node-RED Node in Web Socket mode. The usage is very similar to how to use the Watson Speech to Text service in Web Socket mode. The format of the actions and data is identical. The significant difference is that a token is not needed as the node takes care of that.

The Echo flow shows how to extend the Speech to Text flow with the response going through Watson Text to Speech and being played back in the browser.

When running in socket streaming mode the Node expects to be listening and writing to Node-RED websocket nodes. If you use the html as is, then the socket is on /ws/stt

node-red-node-watson

The feature that this flow uses was released in version 0.6.3 of the node-red-node-watson. As a consequence you will need at least that version.

html

The sample html and javascript is a cut-down to the essentials only of the standard HTML5 audio capture and web socket examples.

Sample flow - Transcriber

[{"id":"f6e79.e431d187","type":"websocket in","z":"3161c0d5.473bd","name":"STT In","server":"6c11fd94.70e4c4","client":"","x":136,"y":256,"wires":[["ed31c194.d3413","5764654b.ca1cdc"]]},{"id":"5764654b.ca1cdc","type":"watson-speech-to-text","z":"3161c0d5.473bd","name":"STT with Sockets","alternatives":1,"speakerlabels":true,"smartformatting":true,"lang":"en-US","langhidden":"en-US","langcustom":"NoCustomisationSetting","langcustomhidden":"NoCustomisationSetting","band":"NarrowbandModel","bandhidden":"NarrowbandModel","password":"","payload-response":true,"streaming-mode":true,"default-endpoint":true,"service-endpoint":"https://stream.watsonplatform.net/speech-to-text/api","x":345.5,"y":257,"wires":[["bc9b0276.805e5","6f94bbcc.9939a4"]]},{"id":"6f94bbcc.9939a4","type":"websocket out","z":"3161c0d5.473bd","name":"","server":"6c11fd94.70e4c4","client":"","x":561,"y":258,"wires":[]},{"id":"ed31c194.d3413","type":"debug","z":"3161c0d5.473bd","name":"Socket Input","active":true,"console":"false","complete":"payload","x":302,"y":335,"wires":[]},{"id":"bc9b0276.805e5","type":"debug","z":"3161c0d5.473bd","name":"STT Output","active":true,"console":"false","complete":"true","x":543,"y":335,"wires":[]},{"id":"b3ee6346.b3ca7","type":"http in","z":"3161c0d5.473bd","name":"","url":"/transcribe","method":"get","upload":false,"swaggerDoc":"","x":134,"y":151,"wires":[["7c2be8da.909278"]]},{"id":"7c2be8da.909278","type":"http request","z":"3161c0d5.473bd","name":"","method":"GET","ret":"txt","url":"https://raw.githubusercontent.com/ibm-early-programs/watson-speech-sockets/master/transcriber.html","tls":"","x":324,"y":152,"wires":[["dc20d5ea.29e6c8"]]},{"id":"dc20d5ea.29e6c8","type":"function","z":"3161c0d5.473bd","name":"Reset msg.headers","func":"msg.headers = {};\nreturn msg;","outputs":1,"noerr":0,"x":521.5,"y":153,"wires":[["a05eb481.1bd6e8"]]},{"id":"a05eb481.1bd6e8","type":"http response","z":"3161c0d5.473bd","name":"","statusCode":"","headers":{},"x":674.5,"y":74,"wires":[]},{"id":"6c11fd94.70e4c4","type":"websocket-listener","z":"","path":"/ws/stt","wholemsg":"false"}]

Sample flow - Echo

[{"id":"e3f8d541.21f6c8","type":"websocket in","z":"e37ede3b.4935d","name":"STT In","server":"7b210ab2.624a64","client":"","x":90,"y":240,"wires":[["985378ec.5cfde8","36abfdc1.152a02"]]},{"id":"c9b4d0.74788b3","type":"websocket out","z":"e37ede3b.4935d","name":"","server":"7b210ab2.624a64","client":"","x":540,"y":380,"wires":[]},{"id":"1bbe858b.7adcda","type":"debug","z":"e37ede3b.4935d","name":"STT Output","active":false,"console":"false","complete":"true","x":470,"y":240,"wires":[]},{"id":"36abfdc1.152a02","type":"watson-speech-to-text","z":"e37ede3b.4935d","name":"","alternatives":1,"speakerlabels":true,"smartformatting":true,"lang":"en-US","langhidden":"en-US","langcustom":"NoCustomisationSetting","langcustomhidden":"NoCustomisationSetting","band":"NarrowbandModel","bandhidden":"NarrowbandModel","password":"","payload-response":true,"streaming-mode":true,"default-endpoint":true,"service-endpoint":"https://stream.watsonplatform.net/speech-to-text/api","x":260,"y":240,"wires":[["1bbe858b.7adcda","b3d54733.4240a8"]]},{"id":"985378ec.5cfde8","type":"debug","z":"e37ede3b.4935d","name":"Socket Input","active":false,"console":"false","complete":"payload","x":250,"y":180,"wires":[]},{"id":"52b0767d.eadd88","type":"watson-text-to-speech","z":"e37ede3b.4935d","name":"","lang":"en-GB","langhidden":"en-GB","langcustom":"NoCustomisationSetting","langcustomhidden":"","voice":"en-GB_KateVoice","voicehidden":"","format":"audio/wav","password":"","payload-response":true,"default-endpoint":true,"service-endpoint":"https://stream.watsonplatform.net/text-to-speech/api","x":520,"y":320,"wires":[["c9b4d0.74788b3"]]},{"id":"b3d54733.4240a8","type":"function","z":"e37ede3b.4935d","name":"Check for transcription","func":"if (msg.payload && msg.payload.results && \n      msg.payload.results[0].final) {\n    var newMsg = {payload: msg.payload.results[0].alternatives[0].transcript};  \n      return [newMsg, msg];  \n}\nreturn [null, msg];","outputs":"2","noerr":0,"x":300,"y":320,"wires":[["52b0767d.eadd88","cd61028f.b3131"],["c9b4d0.74788b3"]]},{"id":"cd61028f.b3131","type":"debug","z":"e37ede3b.4935d","name":"TTS Input","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"true","x":500,"y":280,"wires":[]},{"id":"d9476d53.f1aee","type":"http request","z":"e37ede3b.4935d","name":"","method":"GET","ret":"txt","url":"https://raw.githubusercontent.com/ibm-early-programs/watson-speech-sockets/master/echo.html","tls":"","x":290,"y":120,"wires":[["27052279.05f52e"]]},{"id":"27052279.05f52e","type":"function","z":"e37ede3b.4935d","name":"Reset msg.headers","func":"msg.headers = {};\nreturn msg;","outputs":1,"noerr":0,"x":490,"y":120,"wires":[["9c92163e.0e0c98"]]},{"id":"9c92163e.0e0c98","type":"http response","z":"e37ede3b.4935d","name":"","statusCode":"","headers":{},"x":690,"y":120,"wires":[]},{"id":"384d1111.7ac97e","type":"http in","z":"e37ede3b.4935d","name":"","url":"/transcribe10","method":"get","upload":false,"swaggerDoc":"","x":110,"y":120,"wires":[["d9476d53.f1aee"]]},{"id":"7b210ab2.624a64","type":"websocket-listener","z":"","path":"/ws/stt10","wholemsg":"false"}]

Sample flow - Translator

[{"id":"e3f8d541.21f6c8","type":"websocket in","z":"e37ede3b.4935d","name":"STT In","server":"7b210ab2.624a64","client":"","x":70,"y":220,"wires":[["f90ef260.af3a8"]]},{"id":"c9b4d0.74788b3","type":"websocket out","z":"e37ede3b.4935d","name":"","server":"7b210ab2.624a64","client":"","x":560,"y":220,"wires":[]},{"id":"36abfdc1.152a02","type":"watson-speech-to-text","z":"e37ede3b.4935d","name":"","alternatives":1,"speakerlabels":false,"smartformatting":false,"lang":"en-US","langhidden":"en-US","langcustom":"aabc52f0-dee3-11e6-ba16-9d7ea578c8c5","langcustomhidden":"NoCustomisationSetting","band":"BroadbandModel","bandhidden":"BroadbandModel","password":"","payload-response":true,"streaming-mode":true,"streaming-mute":true,"default-endpoint":true,"service-endpoint":"https://stream.watsonplatform.net/speech-to-text/api","x":220,"y":280,"wires":[["b3d54733.4240a8"]]},{"id":"52b0767d.eadd88","type":"watson-text-to-speech","z":"e37ede3b.4935d","name":"","lang":"en-GB","langhidden":"en-GB","langcustom":"NoCustomisationSetting","langcustomhidden":"","voice":"en-GB_KateVoice","voicehidden":"","format":"audio/wav","password":"","payload-response":true,"default-endpoint":true,"service-endpoint":"https://stream.watsonplatform.net/text-to-speech/api","x":760,"y":320,"wires":[["2ab850cc.48ede"]]},{"id":"b3d54733.4240a8","type":"function","z":"e37ede3b.4935d","name":"Check for transcription","func":"if (msg.payload && msg.payload.results && \n      msg.payload.results[0].final) {\n    var newMsg = {payload: msg.payload.results[0].alternatives[0].transcript};  \n      return [newMsg, msg];  \n}\nreturn [null, msg];","outputs":"2","noerr":0,"x":200,"y":340,"wires":[["53ea5c55.3914b4"],["6350195.73365e8"]]},{"id":"d9476d53.f1aee","type":"http request","z":"e37ede3b.4935d","name":"","method":"GET","ret":"txt","url":"https://raw.githubusercontent.com/ibm-early-programs/watson-speech-sockets/master/translation.html","tls":"","x":290,"y":80,"wires":[["27052279.05f52e"]]},{"id":"27052279.05f52e","type":"function","z":"e37ede3b.4935d","name":"Reset msg.headers","func":"msg.headers = {};\nreturn msg;","outputs":1,"noerr":0,"x":490,"y":80,"wires":[["9c92163e.0e0c98"]]},{"id":"9c92163e.0e0c98","type":"http response","z":"e37ede3b.4935d","name":"","statusCode":"","headers":{},"x":670,"y":80,"wires":[]},{"id":"384d1111.7ac97e","type":"http in","z":"e37ede3b.4935d","name":"","url":"/transcribe10","method":"get","upload":false,"swaggerDoc":"","x":110,"y":80,"wires":[["d9476d53.f1aee"]]},{"id":"51eb7de9.f26fc4","type":"function","z":"e37ede3b.4935d","name":"Set the Language","func":"if (msg.payload &&\n        msg.payload.language &&\n        msg.payload.mode) {\n    var store = 'inputSpoken';\n    if ('output' === msg.payload.mode) {\n        store = 'outputSpoken';  \n    } \n    global.set(store, msg.payload.language);  \n}\nreturn msg;","outputs":1,"noerr":0,"x":310,"y":120,"wires":[["76c64af3.d52384"]]},{"id":"492e9bf0.7b3b14","type":"http in","z":"e37ede3b.4935d","name":"","url":"/setlanguage","method":"post","upload":false,"swaggerDoc":"","x":110,"y":120,"wires":[["51eb7de9.f26fc4"]]},{"id":"76c64af3.d52384","type":"http response","z":"e37ede3b.4935d","name":"","statusCode":"","headers":{},"x":470,"y":120,"wires":[]},{"id":"224bfc51.21b294","type":"function","z":"e37ede3b.4935d","name":"Set Voice","func":"var outLang = global.get('outputSpoken') || null;\nif (outLang) {\n    msg.voice = outLang;\n}\n\nreturn msg;","outputs":1,"noerr":0,"x":760,"y":480,"wires":[["52b0767d.eadd88"]]},{"id":"f90ef260.af3a8","type":"function","z":"e37ede3b.4935d","name":"Set Input Language","func":"var inLang = global.get('inputSpoken') || null;\nif (inLang) {\n    msg.srclang = inLang;\n}\n\nreturn msg;","outputs":1,"noerr":0,"x":230,"y":240,"wires":[["36abfdc1.152a02"]]},{"id":"5b4e1fd5.f1e1c","type":"watson-translator","z":"e37ede3b.4935d","name":"","action":"translate","basemodel":"en-nl","domain":"news","srclang":"es","destlang":"en","password":"","custom":"","domainhidden":"news","srclanghidden":"es","destlanghidden":"en","basemodelhidden":"en-nl","customhidden":"","filetype":"forcedglossary","trainid":"","lgparams2":true,"neural":true,"default-endpoint":true,"service-endpoint":"https://gateway.watsonplatform.net/language-translator/api","x":170,"y":440,"wires":[["993de91e.91fd68"]]},{"id":"3eaba8fc.834d48","type":"watson-translator","z":"e37ede3b.4935d","name":"","action":"translate","basemodel":"es-en","domain":"news","srclang":"en","destlang":"es","password":"","custom":"4210a33e-53f4-4eb6-a08e-d417b03abfcf","domainhidden":"news","srclanghidden":"en","destlanghidden":"es","basemodelhidden":"en-nl","customhidden":"","filetype":"forcedglossary","trainid":"","lgparams2":true,"neural":false,"default-endpoint":true,"service-endpoint":"https://gateway.watsonplatform.net/language-translator/api","x":590,"y":540,"wires":[["224bfc51.21b294"]]},{"id":"53ea5c55.3914b4","type":"function","z":"e37ede3b.4935d","name":"Check For Translation of Input","func":"// Using English as the interim\nvar inLang = global.get('inputSpoken') || null;\nvar outLang = global.get('outputSpoken') || null;\nvar interimLang = 'en';\n\n// If the input language is different to the output language\n// then a translation is required. If the input language is \n// English then the first translation (into English) is\n// not needed\nif (inLang && outLang) {\n    msg.srclang = inLang.substr(0,2);\n    msg.destlang = outLang.substr(0,2);\n    if (msg.srclang !== msg.destlang) {\n        if (msg.srclang !== interimLang) {\n          msg.destlang = interimLang;\n          // Two step translation is needed with English in the middle\n          return [null, msg, null];  \n        }\n        // One step translation is needed, as English is the input\n        return [null, null, msg];\n    }\n}\n\n// No translation is needed, as the input language is the same\n// as the output\nreturn [msg, null, null];\n\n","outputs":3,"noerr":0,"x":470,"y":340,"wires":[["224bfc51.21b294"],["5b4e1fd5.f1e1c"],["993de91e.91fd68"]]},{"id":"27ac7c4b.550724","type":"link in","z":"e37ede3b.4935d","name":"ws-stt10","links":["6350195.73365e8","2ab850cc.48ede"],"x":435,"y":240,"wires":[["c9b4d0.74788b3"]]},{"id":"6350195.73365e8","type":"link out","z":"e37ede3b.4935d","name":"ws-out","links":["27ac7c4b.550724"],"x":235,"y":380,"wires":[]},{"id":"2ab850cc.48ede","type":"link out","z":"e37ede3b.4935d","name":"","links":["27ac7c4b.550724"],"x":855,"y":280,"wires":[]},{"id":"993de91e.91fd68","type":"function","z":"e37ede3b.4935d","name":"Check for Translation of Output","func":"// Using English as the interim\nvar inLang = 'en';\nvar outLang = global.get('outputSpoken') || null;\n\n// For the second translation the source is going to \n// be English. We should only be in this logic, if some\n// sort of translation is needed. \nif (inLang && outLang) {\n    msg.srclang = inLang.substr(0,2);\n    msg.destlang = outLang.substr(0,2);\n    if (msg.srclang !== msg.destlang) {\n        // The parameters for the second translation \n        // are now set.\n        return [msg, null];\n    }\n}\n\n// Both source and destination have been detected as english\n// so no further translation is needed.\nreturn [null, msg];","outputs":2,"noerr":0,"x":430,"y":460,"wires":[["3eaba8fc.834d48"],["224bfc51.21b294"]]},{"id":"7b210ab2.624a64","type":"websocket-listener","z":"","path":"/ws/stt10","wholemsg":"false"}]