deepgram / deepgram-js-sdk

Official JavaScript SDK for Deepgram's automated speech recognition APIs.
https://developers.deepgram.com
MIT License
128 stars 46 forks source link

Error: Unknown TranscriptionSource type #40

Closed Seek4samurai closed 2 years ago

Seek4samurai commented 2 years ago

What is the current behavior?

I'm still facing this issue on my application.

What's happening that seems wrong? I gave blob URL from my application and passed it to API. I specified mimetype: "audio/webm"

Steps to reproduce

My file from where I'm sending the request.

const audio = document.getElementById("audio");
const mainAudio = document.createElement("audio");
const paragraph = document.getElementById("para");
var items = [];

const start = async () => {
  var device = navigator.mediaDevices.getUserMedia({ audio: true });
  device.then((stream) => {
    var recorder = new MediaRecorder(stream);

    recorder.ondataavailable = async (e) => {
      items.push(e.data);
      if (recorder.state == "inactive") {
        var blob = new Blob(items, { type: "audio/webm" });

        mainAudio.setAttribute("controls", "controls");
        audio.appendChild(mainAudio);

        mainAudio.innerHTML =
          '<source src="' + URL.createObjectURL(blob) + '" type="video/webm"/>';
        const audioURL = URL.createObjectURL(blob);

        const res = await axios.post("http://127.0.0.1:5000/response", {
          audioURL: audioURL,
          mimeType: "video/webm",
        });
        paragraph.innerText = res.data;
      }
    };
    recorder.start(100);
    console.log("started!");

    setTimeout(() => {
      recorder.stop();
    }, 5000);
  });
};

This is my backend file where I imported DeepGram API

const express = require("express");
const dotenv = require("dotenv");
const badWords = require("./badwords.json");
const { Deepgram } = require("@deepgram/sdk");

dotenv.config();

const router = express.Router();
const badWordsArray = badWords.words;
const deepgramApiKey = process.env.KEY;

router.post("/response", async (req, res) => {
  // Initializes the Deepgram SDK
  const deepgram = new Deepgram(deepgramApiKey);

  const transcriptionArray = [];

  // Function definition with passing two arrays
  function findCommonElement(array1, array2) {
    for (let i = 0; i < array1.length; i++) {
      for (let j = 0; j < array2.length; j++) {
        if (array1[i] === array2[j]) {
          return true;
        }
      }
    }
    return false;
  }

  const audioStream = req.body.audioURL;
  const mimeType = req.body.mimeType;

deepgram.transcription
    .preRecorded(
      { mimetype: mimeType },
      { url: audioStream },
      { punctuate: true, language: "en-GB" }
    )
    .then((transcription) => {
      const transcriptionObject =
        transcription.results.channels[0].alternatives[0].words;
      transcriptionObject.map((wordData) => {
        transcriptionArray.push(wordData.word);
      });
      res.send(transcription);
    })
    .catch((err) => {
      console.log(err);
    });
});

module.exports = router;

Expected behavior

Since I'm passing the direct URL of audio file I guess, it should be easily transcribed by the api.

What would you expect to happen when following the steps above?

Error: Unknown TranscriptionSource type
at /media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/preRecordedTranscription.js:98:27
at step (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/preRecordedTranscription.js:44:23)
at Object.next (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/preRecordedTranscription.js:25:53)
at /media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/preRecordedTranscription.js:19:71
at new Promise (<anonymous>)
at __awaiter (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/preRecordedTranscription.js:15:12)
at preRecordedTranscription (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/preRecordedTranscription.js:78:84)
at Transcriber.<anonymous> (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/index.js:56:106)
at step (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/index.js:33:23)
at Object.next (/media/seek4samurai/Files and all/Projects/tutorials/DeepGram/backend/node_modules/@deepgram/sdk/dist/transcription/index.js:14:53)

Please tell us about your environment

We want to make sure the problem isn't specific to your operating system or programming language.

Other information

Anything else we should know? (e.g. detailed explanation, stack-traces, related issues, suggestions how to fix, links for us to have context, eg. stack overflow, codepen, etc)

I'm using Express server for backend purpose. And with the help of navigator.mediaDevices as described above I'm getting the audio input. The URL looks something like this blob:http://127.0.0.1:5500/58bf900d-a1c4-4e75-82e5-be16916c0834 and when opened this audio starts playing. I guess these information would be enough if needed anymore information, please let me know what I'm doing wrong. Help would be appreciated.

michaeljolley commented 2 years ago

So there's a couple things to unwrap here:

First, the URL you are trying to pass to Deepgram is a local URL. It isn't accessible by any service not on your local network so the API won't be able to retrieve the audio from it. The URL you pass to be transcribed needs to be publicly accessible on the internet.

Second, it appears you aren't submitting the request to the preRecorded function correctly. You are sending:

deepgram.transcription
    .preRecorded(
      { mimetype: mimeType },
      { url: audioStream },
      { punctuate: true, language: "en-GB" }
    )

However, the signature for the preRecorded function is:

/**
* Transcribes prerecorded audio from a file or buffer
* @param source Url or Buffer of file to transcribe
* @param options Options to modify transcriptions
*/
async preRecorded(
  source: TranscriptionSource,
  options?: PrerecordedTranscriptionOptions
)

In your example, and assuming the URL was publicly accessible, that would look like:

deepgram.transcription
    .preRecorded(
      { url: audioStream },
      { punctuate: true, language: "en-GB" }
    )

There is no parameter of the preRecorded function that takes an object with mimetype and url.

You could possibly convert the blob url to a buffer and then pass that to the preRecorded function. An example of how to convert it lives on this StackOverflow post. With that final buffer you could send:


const fetch = require("node-fetch");
const response = await fetch(audioStream);
const blob = await response.blob();
const arrayBuffer = await blob.arrayBuffer();
const myBuffer = Buffer.from(arrayBuffer);

deepgram.transcription
    .preRecorded(
      { buffer: myBuffer, mimetype: mimeType },
      { punctuate: true, language: "en-GB" }
    )

Full disclosure: I haven't tested converting that blob url to a Node Buffer but that code looks like it would work on first blush.

Hopefully, this helps get you started.

Seek4samurai commented 2 years ago

Hi, thanks for the response, I'm using it like this. This is my main.js where I send request.

const audioURL = URL.createObjectURL(blob);
const res = await axios.post("http://127.0.0.1:5000/response", {
          audioURL: audioURL,
          mimeType: "audio/webm",
});

This is my backend controller.js.

const audioStream = req.body.audioURL;
const mimeType = req.body.mimeType;
const bufferData = Buffer.from(audioStream, "binary");
console.log(bufferData);

  deepgram.transcription
    .preRecorded(
      { buffer: bufferData, mimetype: mimeType },
      { punctuate: true, language: "en-GB" }
    )

When I log this bufferData on backend, I get this in console <Buffer 62 6c 6f 62 3a 68 74 74 70 3a 2f 2f 31 32 37 2e 30 2e 30 2e 31 3a 35 35 30 30 2f 39 36 37 65 33 31 35 36 2d 62 62 33 36 2d 34 37 31 35 2d 61 35 35 31 ... 13 more bytes> and this Error

DG: {"error":"Bad Request","reason":"Failed to process audio: corrupt or unsupported data.","transaction_key":"YGJIDrpWCZRSLfYsbEqItlwPr6pufIRKnvn6pLg+tSsl6jZvQR4B4o9YWMbT1qeAl2zxy2X2z0uipKP1hoL2oXNHTj5TpKi0J6j+Ss6AkyMCu9hFOf3xI3uueHW/2W+qjsCr8xibO/box7ISUhgFlQ63H+PeXD21t9wPHcBbOHVhClphWseoxWiV6NUsY61Hl6HOl02HjmjlmJczcpKenw4T4OpxBcRgpJLg1YwgovLp/JYmwhQQLlW7p8yvTvZcgNkYIZfbcOkzKuEwgxgDExjWoD2WbcgI/9hwuply+hspuN1v9NS54CaE/Ys=","request_id":"d7309293-7d99-486c-bf4e-1d5e4b851039"}

When I do console.log(typeof bufferData) it shows Object which might be fine as Buffer is also a type of Object. Is this Buffer data is right for API? By the way: I guess other way to transfer the file could be this https://developers.deepgram.com/blog/2021/11/sending-audio-files-to-expressjs-server/#html-and-js-using-a-formdata-object

Seek4samurai commented 2 years ago

Hi @MichaelJolley , now I'm passing exactly the Buffer data of audio. But this is still giving same issue

App running or port 5000
<Buffer 6e 5a 1b 86 db 69 ff fd 76 ef 4d 35 e7 9d 34 ff 6f 38 d3 9e db f3 e7 3c d3 8f b8 eb 7d be 6d d7 78 fb 9f 1a 75 dd 1c e1 ef 1d>
DG: {"error":"Bad Request","reason":"Failed to process audio: corrupt or unsupported data.","transaction_key":"Tcyri0yOeMkWmLVUy5vi+T3hLUGlhRqa5mA7kNjGFTdOeSgwlA5yk8A2wUCRwPTGZeGFelCVjVsefFbUvFaE49JQkXArQdVTYLupjt89XVpf1j5NL+ZnLpBKamKz4izqKglkrZtp3pt7EJKRw0lOIvrqKLWZH/aYcE1UupjqB5rj7MvvFPG3VBI4gx9BnqVnf23+MHp7vmgJU5AhurDSkY4DZpab2TkzzuUPmtoOcsSGjYAIbPSfROK1Cav/BKVwEbMQqehP+O9ZkzyN7g==","request_id":"10239088-cf74-48b0-8bbc-eb8af3c7d3bb"}
router.post("/response", upload.none(), async (req, res) => {
  const transcriptionArray = [];

  const audioBlob = req.body.files;
  const audioBuffer = Buffer.from(audioBlob, "base64"); // Buffer data

  await deepgram.transcription
    .preRecorded({
      buffer: audioBuffer,
      mimetype: "audio/wav",
    })
    .then((transcription) => {
      const transcriptionObject =
        transcription.results.channels[0].alternatives[0].words;
      transcriptionObject.map((wordData) => {
        transcriptionArray.push(wordData.word);
      });
      res.send(transcription);

      if (findCommonElement(transcriptionArray, badWordsArray)) {
        console.log("Not safe!");
      } else {
        console.log("Safe!");
      }
    })
    .catch((err) => {
      console.log(err);
    });
});

Any suggestions, I guess that Buffer data is fine...

michaeljolley commented 2 years ago

I'm not certain but I don't think you want:

const audioBuffer = Buffer.from(audioBlob, "base64"); // Buffer data

That's sending a base64 encoded file to the API, which is not supported. I'd suggest using the express-fileupload npm package and try something like the following:

const express = require("express");
const fileUpload = require('express-fileupload');
const dotenv = require("dotenv");
const { Deepgram } = require("@deepgram/sdk");

dotenv.config();

const router = express.Router();
router.use(fileUpload());

router.post("/response", upload.none(), async (req, res) => {
  const transcriptionArray = [];

  if (!req.files || Object.keys(req.files).length === 0) {
    return res.status(400).send('No files were uploaded.');
  }

  // The name of the input field (i.e. "sampleFile") is used to retrieve the uploaded file
  const sampleFile = req.files.sampleFile;

  await deepgram.transcription
    .preRecorded({
      buffer: sampleFile.data,
      mimetype: sampleFile.mimetype,
    })
    .then((transcription) => {
      const transcriptionObject =
        transcription.results.channels[0].alternatives[0].words;
      transcriptionObject.map((wordData) => {
        transcriptionArray.push(wordData.word);
      });
      res.send(transcription);

      if (findCommonElement(transcriptionArray, badWordsArray)) {
        console.log("Not safe!");
      } else {
        console.log("Safe!");
      }
    })
    .catch((err) => {
      console.log(err);
    });
});