Closed luisrosasg closed 4 years ago
const { Transform } = require('stream');
const { EventEmitter } = require('events');
const config = require('config');
const { IamAuthenticator } = require('ibm-watson/auth');
const SpeechToTextV1 = require('ibm-watson/speech-to-text/v1');
const speechToText = new SpeechToTextV1({
authenticator: new IamAuthenticator({
apikey: '',
}),
url: '',
});
class WatsonConnector extends EventEmitter {
constructor(id, log){
super();
this.log = log.child({ id });
this.recognizeStream = null;
this.params = {
objectMode: true,
contentType: 'audio/l16;rate=16000',
model: 'es-CL_NarrowbandModel',
maxAlternatives: 1
};
this.audioInput = [];
this.newStream = true;
this.streamingLimit = 290000;
this.resultEndTime = 0;
this.isFinalEndTime = 0;
this.finalRequestEndTime = 0;
this.bridgingOffset = 0;
this.lastTranscriptWasFinal = false;
}
_audioInputStreamTransform () {
return new Transform({
transform: (chunk, encoding, callback) => {
if (this.newStream && this.lastAudioInput.length !== 0) {
// Approximate math to calculate time of chunks
const chunkTime = this.streamingLimit / this.lastAudioInput.length;
if (chunkTime !== 0) {
if (this.bridgingOffset < 0) {
this.bridgingOffset = 0;
}
if (this.bridgingOffset > this.finalRequestEndTime) {
this.bridgingOffset = this.finalRequestEndTime;
}
const chunksFromMS = Math.floor(
(this.finalRequestEndTime - this.bridgingOffset) / chunkTime
);
this.bridgingOffset = Math.floor(
(this.lastAudioInput.length - chunksFromMS) * chunkTime
);
for (let i = chunksFromMS; i < this.lastAudioInput.length; i++) {
if (this.recognizeStream && this.recognizeStream.writeable) {
this.recognizeStream.write(this.lastAudioInput[i]);
}
}
}
this.newStream = false;
}
this.audioInput.push(chunk);
if (this.recognizeStream && this.recognizeStream.writable) {
this.recognizeStream.write(chunk);
}
callback();
}
});
}
_startRecognizeStream() {
this.log.info('starting a new stream to Watson');
// Clear current audioInput
this.audioInput = [];
// Initiate (Reinitiate) a recognize stream
this.recognizeStream = speechToText.recognizeUsingWebSocket(this.params);
this.recognizeStream.on('error', (err) => {
this.log.info({ err }, 'Watson API request error');
});
this.recognizeStream.on('data', (data) => {
this.log.info(JSON.stringify(data));
});
this.restarting = false;
}
start(stream) {
this.log.info('starting recognition to Watson')
this._startRecognizeStream();
stream.pipe(this._audioInputStreamTransform());
}
}
module.exports = WatsonConnector;
Audio is sent via RTP from one port to another and if theres a NAT involved you'll lose the audio. How are you currently setup in terms of audio coming out of Asterisk and then how are you running this project? Both need to see one another without a NAT in the middle
I installed everything on the same server and it receives streaming but my problem now is Watson, I pass the data to the websocket but apparently it doesn't know when it ends and it gives me a timeout error :(
Hey @luisrosasg - sounds great! Can you send me an email - dan@nimblea.pe :) I'll have a think about how much effort it would be to do a watson connector :)
This has been resolved, if anyone is interested in the resulting watson connector you'll need to talk to @luisrosasg as it's his intellectual property, or ask me and I can write you one without looking at the code I wrote for Luis (now deleted from my machine so that I can't take any form of inspiration from it)
Hi, I have a problem when trying to get the audio, I made a small modification to support Watson STT, but when I call from the softphone the program recognizes me, it connects but does not transmit data.
I used _audioInputStreamTransform that I found in another project of yours that uses Google STT (is similar to watson) but does not get to enter the flow