alphacep / vosk-api

Offline speech recognition API for Android, iOS, Raspberry Pi and servers with Python, Java, C# and Node
Apache License 2.0
7.35k stars 1.04k forks source link

Vosk node js does not recognize the words #1590

Open Falkys opened 3 weeks ago

Falkys commented 3 weeks ago

Code

var vosk = require('vosk')

const fs = require("fs");
var mic = require("mic");

MODEL_PATH = "./vosk"
SAMPLE_RATE = 16000

if (!fs.existsSync(MODEL_PATH)) {
    console.log("Please download the model from https://alphacephei.com/vosk/models and unpack as " + MODEL_PATH + " in the current folder.")
    process.exit()
}

vosk.setLogLevel(0);
const model = new vosk.Model(MODEL_PATH);
const rec = new vosk.Recognizer({model: model, sampleRate: SAMPLE_RATE});

var micInstance = mic({
    rate: String(SAMPLE_RATE),
    channels: '1',
    debug: false,
    device: 'default',    
});

var micInputStream = micInstance.getAudioStream();

micInputStream.on('data', data => {
    if (rec.acceptWaveform(data))
        console.log(rec.result());
    else
        console.log(rec.partialResult());
});

micInputStream.on('audioProcessExitComplete', function() {
    console.log("Cleaning up");
    console.log(rec.finalResult());
    rec.free();
    model.free();
});

process.on('SIGINT', function() {
    console.log("\nStopping");
    micInstance.stop();
});

micInstance.start();

Console:

PS C:\projects\palladium> node test.js
LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=10 max-active=3000 lattice-beam=2
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:6:7:8:9:10
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from ./vosk/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:282) Loading HCL and G from ./vosk/graph/HCLr.fst ./vosk/graph/Gr.fst
LOG (VoskAPI:ReadDataFiles():model.cc:303) Loading winfo ./vosk/graph/phones/word_boundary.int
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ text: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ text: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ text: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ text: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ text: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ partial: '' }
{ text: '' }

Stopping
Cleaning up
{ text: '' }
Falkys commented 3 weeks ago

I tried the same model in Python and everything worked, but not in node js

nshmyrev commented 3 weeks ago

Its likely and issue with mic module, you can check the data actually contains audio