Looking for real time transcription for twilio stream with amazon transcribe

tamdilip commented 4 years ago

Can you help by publishing a sample or documentation on real time transcription for twilio streams with amazon transcribe, I don't see any positive solutions so far after 2 days of searching.

pandey-sushil commented 4 years ago

@tamdilip Did you figure out to use twilio stream with amazon transcribe?

tamdilip commented 4 years ago

@tamdilip Did you figure out to use twilio stream with amazon transcribe?

@pandey-sushil Sadly, no ☹️ !! It was months ago, I was midway trying this LEX solution - amazon-lex-conversational-interface-for-twilio. Where we can get the transcribed text from LEX intents response atleast, where they are encoding twilio media streams audio format into aws supported one. But left that inbetween and finally ended up with Amazon connect and this solution with few tweaks - ai-powered-speech-analytics-for-amazon-connect.

tamdilip commented 4 years ago

@tamdilip Did you figure out to use twilio stream with amazon transcribe?

@pandey-sushil Finally they dropped it here on aws signature audio chunks supported for transcribe streaming - https://github.com/TwilioDevEd/talkin-cedric-node/blob/master/server.js

ajporterfield commented 1 year ago

I would love to see an example using @aws-sdk/client-transcribe-streaming. I tried to get that working today but all I got back from AWS was empty responses (ex: { TranscriptEvent: { Transcript: { Results: [] } } }). I wrote to a transform stream similar to the talkin-cedric-node example linked above.

new stream.Transform({
  transform(chunk, encoding, done) {
    const wav = new WaveFile();
    wav.fromScratch(1, 8000, '8m', Buffer.from(chunk, 'base64'));
    wav.fromMuLaw();
    this.push(Buffer.from(wav.data.samples));
    done();
  }
});

UPDATE:

I was able to get this working this morning with a slight variation on the code snipped above.

new stream.Transform({
  transform(chunk, encoding, done) {
    const wav = new WaveFile();
    wav.fromScratch(1, 8000, '8m', chunk);
    wav.fromMuLaw();
    this.push(Buffer.from(wav.data.samples));
    done();
  }
});

This is how I'm writing to the transform stream.

transformStream.write(Buffer.from(payload, 'base64'))

josephadd commented 1 year ago

@ajporterfield Please can you help me fix my code? Your help would mean soo much to me. Currently this whole code doesn't seem to work for me.

`require('dotenv').config(); const express = require('express'); const expressWebSocket = require('express-ws'); const { engine } = require('express-handlebars'); const Transform = require('stream').Transform; const websocketStream = require('websocket-stream/stream'); const WaveFile = require('wavefile').WaveFile; const AWS = require('aws-sdk'); const TranscribeService = require('aws-sdk/clients/transcribeservice'); const TwilioClient = require('twilio');

// Set the AWS region AWS.config.update({ region: process.env.AWS_REGION, accessKeyId: process.env.AWS_ACCESS_KEY_ID, secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, });

// Create an S3 service object const s3 = new AWS.S3(); // Create an Amazon Transcribe service object const transcribeService = new AWS.TranscribeService();

const app = express(); // Extend express app with app.ws() expressWebSocket(app, null, { perMessageDeflate: false, });

app.engine('hbs', engine()); app.set('view engine', 'hbs'); app.set('views', './views');

// Make all the files in 'public' available app.use(express.static('public'));

app.get('/', (request, response) => { response.render('home', { number: process.env.TWILIO_NUMBER, layout: false }); });

// Respond with Twilio instructions to begin the stream app.post('/twiml', (request, response) => { response.setHeader('Content-Type', 'application/xml'); response.render('twiml', { host: request.hostname, layout: false }); });

// Step 1: Create an S3 Bucket const createS3Bucket = async (bucketName) => { try { const params = { Bucket: bucketName, ACL: 'private', // Set the bucket access control };

await s3.createBucket(params).promise();
console.log(`S3 Bucket "${bucketName}" created successfully.`);

} catch (error) { console.error('Error creating S3 bucket:', error); } };

// Step 2: Upload the Audio File to the S3 Bucket const uploadAudioFileToS3 = async (bucketName, audioFilePath, key) => { try { const fileContent = require('fs').readFileSync(audioFilePath);

const params = {
  Bucket: bucketName,
  Key: key,
  Body: fileContent,
};

await s3.upload(params).promise();
console.log('Audio file uploaded to S3 successfully.');

} catch (error) { console.error('Error uploading audio file to S3:', error); } };

app.ws('/media', (ws, req) => { // Audio Stream coming from Twilio const mediaStream = websocketStream(ws); let callSid; const audioStream = new Transform({ transform: (chunk, encoding, callback) => { const msg = JSON.parse(chunk.toString('utf8')); if (msg.event === 'start') { callSid = msg.start.callSid; console.log(Captured call ${callSid}); } // Only process media messages if (msg.event !== 'media') return callback(); // This is mulaw return callback(null, Buffer.from(msg.media.payload, 'base64')); }, });

const pcmStream = new Transform({ transform: (chunk, encoding, callback) => { const wav = new WaveFile(); wav.fromScratch(1, 8000, '8m', chunk); wav.fromMuLaw(); return callback(null, Buffer.from(wav.data.samples)); }, });

// Step 3: Create an Amazon Transcribe Job const createTranscriptionJob = async (jobName, bucketName, key, languageCode) => { try { const params = { TranscriptionJobName: jobName, LanguageCode: languageCode, Media: { MediaFileUri: s3://${bucketName}/${key}, }, OutputBucketName: bucketName, // Optional: Output results to the same bucket };

  await transcribeService.startTranscriptionJob(params).promise();
  console.log('Transcription job created successfully.');
} catch (error) {
  console.error('Error creating transcription job:', error);
}

};

// Step 4: Monitor the Transcription Job const monitorTranscriptionJob = async (jobName) => { try { const params = { TranscriptionJobName: jobName, };

  const response = await transcribeService.getTranscriptionJob(params).promise();
  const { TranscriptionJobStatus, TranscriptionJob } = response;

  console.log(`Transcription job status: ${TranscriptionJobStatus}`);
  if (TranscriptionJobStatus === 'COMPLETED') {
    console.log('Transcription job completed successfully.');
    console.log('Transcription Results:', TranscriptionJob.Transcript.TranscriptFileUri);
  }
} catch (error) {
  console.error('Error monitoring transcription job:', error);
}

};

// Pipe our streams together mediaStream.pipe(audioStream).pipe(pcmStream);

mediaStream.on('close', () => { console.log('Media stream closed.'); // Step 4: Monitor the Transcription Job monitorTranscriptionJob(jobName); });

audioStream.on('data', async (data) => { const twiml = new TwilioClient.twiml.VoiceResponse(); twiml.say( { voice: "alice", language: "en-GB", }, data ); twiml.pause({ length: 120 }); client.calls(callSid).update({ twiml: twiml.toString(), });

transcribeService.startStreamTranscription(
  {
    LanguageCode: 'en-GB',
    MediaEncoding: 'pcm',
    MediaSampleRateHertz: 8000,
    Media: {
      AudioStream: data,
    },
  },
  (err, response) => {
    if (err) {
      console.error('Error starting stream transcription:', err);
      return;
    }
    if (response && response.TranscriptResultStream) {
      response.TranscriptResultStream.on('data', (data) => {
        if (data.TranscriptEvent && data.TranscriptEvent.Transcript) {
          const transcript = data.TranscriptEvent.Transcript.Results[0].Transcript;
          console.log(`Transcript: ${transcript}`);

          // Additional code to process and send the transcription as needed
        }
      });
    }
  }
);

});

// Step 1: Create an S3 Bucket const bucketName = process.env.BUCKET_NAME; createS3Bucket(bucketName);

// Step 2: Upload the Audio File to the S3 Bucket const audioFilePath = 'PATH_TO_AUDIO_FILE'; const key = 'OBJECT_KEY_WITHIN_BUCKET'; uploadAudioFileToS3(bucketName, audioFilePath, key);

// Step 3: Create an Amazon Transcribe Job const jobName = 'geohilfe'; const languageCode = 'en-GB'; // Specify the desired language code createTranscriptionJob(jobName, bucketName, key, languageCode); });

// Production error handler // No stacktraces leaked to the user app.use(function (err, req, res, next) { console.trace(err); res.status(err.status || 500); res.send({ message: err.message, error: {}, }); });

const listener = app.listen(4007, () => { console.log('Your app is listening on port ' + listener.address().port); }); `

josephadd commented 1 year ago

If you have good resources to assist me implement my realtime transcription with Amazon transcription and Twilio will be great. Thanks

szl0144 commented 8 months ago

Same question here, does anyone knows how to integrate Amazon transcribe with Twillio?

twilio / media-streams

Looking for real time transcription for twilio stream with amazon transcribe #24