jaxcore / bumblebee-hotword-node

BumbleBee Hotword for NodeJS
Apache License 2.0
14 stars 4 forks source link

Having issues actually detecting hotword #12

Open JackBlundellReevex opened 8 months ago

JackBlundellReevex commented 8 months ago

Hey there,

I'm trying to setup bumblebee within a server that will listen to a discord server for the 'bumblebee' hotword derived from https://github.com/SteTR/Emost-Bot/tree/master

So far I have got the tool integrated and emitting 'data', but it never fires the 'hotspot' callback when i say "bumblebee".

I am not sure whether I have the wrong format for the stream perhaps that is going into it, or doing something else wrong.

VoiceRecognitionService.js - Where Bumblebee is handled

class VoiceRecognitionService {
  constructor(hotword, connection, voiceReceiverStream) {
    // boolean to check if it's currently recording to google speech api
    this.recording = false;
    this._connection = connection;
    this._guild = connection.joinConfig.guild;
    this._inputFormat = {
      config: {
        encoding: "LINEAR16",
        audioChannelCount: 1,
        sampleRateHertz: 16000,
        languageCode: "en-US",
      },
    };
    this._transcribed = "";
    this.startBumblebee("bumblebee", voiceReceiverStream);
  }

  startBumblebee(hotword, voiceReceiverStream) {
    console.log("Strting bumblebee");
    // Setting up bumblebee for hotword detection
    this._bumblebee = new Bumblebee()
      .on("hotword", async (hotword) => {
        // This is not being called
        console.log("Heard bumblebee");
        if (!this.recording) {
          await this.startStream();
          this.recording = true;
          // TODO maybe separate logic of connections into different?

          setTimeout(async () => {
            console.log(
              `Guild ${this._guild.id}: Disabled Google Stream from Listening`
            );
            this.recording = false;
            this.shutdownStream();
          }, 5000);
        }
      })
      .on("data", (data) => {
        // This is being called...
        console.log("Data back!");
        if (this.recording) {
          if (this._currentStream.destroyed)
            throw new Error(
              `Guild ${this._guild.id}: Stream was destroyed when attempting to send data from bumblebee to Google`
            );
          this._currentStream.write(data);
        }
      });

    console.log("Adding hotword", hotword);
    this._bumblebee.addHotword(hotword);
    this._bumblebee.setSensitivity(1);

    this._bumblebee.on("error", (error) => {
      console.log(`Guild ${this._guild.id}: Bumblebee Error: ${error}`);
      this.startBumblebee(voiceReceiverStream);
    });
    console.log(
      "Starting bumblebee with stream!",
      voiceReceiverStream !== undefined
    );
    this._bumblebee.start({ stream: voiceReceiverStream });
  }

  startStream() {
    this._currentStream = client
      .streamingRecognize(this._inputFormat)
      .on("error", (error) =>
        console.error(`Guild ${this._guild.id}: Google API error ${error}`)
      )
      .on("data", (data) => {
        console.log("Got data!");
        this._transcribed = data.results[0].alternatives[0].transcript;
        console.log(
          `Guild ${this._guild.id}: Google API Transcribed: ${this._transcribed}`
        );
        this.executeCommand(this._transcribed);
      });
  }

  shutdownStream() {
    console.log(`Guild ${this._guild.id}: Shutting Down Google Stream`);
    this._currentStream.end();
  }

  /**
   * Executes command given the transcribed text
   *
   * @param transcribed
   * @returns {Promise<void>}
   */
  async executeCommand(transcribed) {
    const client = this._connection.client;
    const stuff = client.voiceConnections.get(
      this._connection.channel.guild.id
    );

    stuff.textChannel.send(
      `<@${stuff.listeningTo.id}> said: \"${
        transcribed ? transcribed : "..."
      }\"`
    );
    let arrayed_transcribed = transcribed.split(" ");
    const stringCommand = arrayed_transcribed.shift().toLowerCase();
    const command = client.voiceCommands.get(stringCommand);
    if (command === undefined) {
      console.log(
        `Guild ${this._guild.id}: ${stringCommand} command not available`
      );
      stuff.textChannel.send(`${command} is not available`);
      return;
    }
    command.execute(
      client,
      this._connection.channel.guild,
      arrayed_transcribed
    );
  }

  shutdown() {
    this._bumblebee.destroy();
    this._connection.disconnect();
  }
}

converter.js - Where we convert the discord stream into the appropriate format

const ffmpegPath = require("@ffmpeg-installer/ffmpeg").path;
const ffmpeg = require("fluent-ffmpeg");
ffmpeg.setFfmpegPath(ffmpegPath);
/**
 * Creates a audio stream convert to convert one audio stream's format to another.
 * E.g. 48000 Hz 2 channel to 16000 Hz 1 channel
 * @param stream input audio stream
 * @param inputArray array of flags for ffmpeg to describe input format
 * @param outputArray  array of flags for ffmpeg to describe output format
 * @param formatType REQUIRED. the format of the output (e.g. s16le)
 * @returns {stream.Writable|{end: boolean}|*} a new stream that has converted the input audio stream into the format requested
 */
function createConverter(
  stream,
  inputArray = ["-f s16le", "-ac 2", "-ar 44100"],
  outputArray = ["-ac 1", "-ar 16000"],
  formatType = "s16le"
) {
  return new ffmpeg()
    .input(stream)
    .inputOptions(inputArray)
    .outputOptions(outputArray)
    .format(formatType)
    .pipe({ end: false });
}

module.exports = { createConverter };

Connect.js - Where stream is converted and then fed to Voice

const listenStream = await connection.receiver
        .subscribe(member.user.id, {
          end: {
            behavior: EndBehaviorType.Manual,
          },
        })
        .on("error", (error) => {
          console.log("audioReceiveStream error: ", error);
        });

      // // // Make voice streams for voice commands
      const voiceRecorderStream = createConverter(listenStream);

      voiceRecorderStream.on('data',(data)=>{
        console.log(data)
      })
      const vr = new VoiceRecognitionService(
        hotword,
        connection,
        voiceRecorderStream
      );

Some of the logs that repeat during stream for insight:

Stream! <Buffer 1e 0f 85 0f ee f1 e6 05 ad f9 4c e8 6d f3 04 e6 0e e8 d9 23 94 0e c1 03 24 0b 77 2d b4 ef 8f ee a1 10 ba fd 05 15 49 0c c4 32 1f 16 03 03 6f 58 2a f5 ... 692 more bytes>
Emmiting data!
Data back!

I'm really not sure where I'm going wrong here, and any help would be appreciated.

JackJBlundell commented 8 months ago

Can any of the maintainers provide any insights? @jaxcore @benbucksch @dsteinman

Really want to use this tool - Super close & would really appreciate any help.

P.s sorry on my secondary account here!

dsteinman commented 8 months ago

Hi, sorry for being MIA. I don't have any active voice projects at the moment so this has been a low priority for me.

Unfortunately the Porcupine codebase is not exactly open source and the web assembly code is all obfuscated, including the hotwords, so it was always a gamble that this wrapper hack was going to work long term. They probably encoded an expiration date in web assembly.

I'll try to set this up again and see if an updated version of Porcupine can be copied in. If they didn't make any major changes to it, it might work as-is. If they've revamped their code a lot I'd maybe try to find another solution.

Have you looked around at other projects? Is there seriously still no other valid options?

JackJBlundell commented 8 months ago

Hi, sorry for being MIA. I don't have any active voice projects at the moment so this has been a low priority for me.

Unfortunately the Porcupine codebase is not exactly open source and the web assembly code is all obfuscated, including the hotwords, so it was always a gamble that this wrapper hack was going to work long term. They probably encoded an expiration date in web assembly.

I'll try to set this up again and see if an updated version of Porcupine can be copied in. If they didn't make any major changes to it, it might work as-is. If they've revamped their code a lot I'd maybe try to find another solution.

Have you looked around at other projects? Is there seriously still no other valid options?

Hey thanks so much for getting back to me! No problem at all.

Ah okay, I had thought I was doing something wrong because the package contains files for 1.9 of Porcupine - I will also look into this on my end to see if I can get something working so you are not alone!

I have seen some other alternatives - I had thought to try this one before moving on as it looked like exactly what was needed - If I can't get it figured out though i can always look for alternatives but would be awesome to get this in place as seemingly I'm just missing one piece of the puzzle.

benbucksch commented 8 months ago

My 2 cents (I'm not a maintainer):

None of this is a response to a specific question that you have, but maybe one of these pointers give you the hint you need to find the problem.