alphacep / vosk-api

Offline speech recognition API for Android, iOS, Raspberry Pi and servers with Python, Java, C# and Node
Apache License 2.0
7.57k stars 1.06k forks source link

The output result is messy code #1330

Closed liming1010 closed 1 year ago

liming1010 commented 1 year ago
public static void main(String[] argv) throws IOException, UnsupportedAudioFileException {
        LibVosk.setLogLevel(LogLevel.DEBUG);
        try (Model model = new Model("conf/vosk-model-small-cn-0.22");
        // try (Model model = new Model("conf/vosk-model-cn-0.22");
             InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("conf/test2.wav")));
             Recognizer recognizer = new Recognizer(model, 16000)) {

            int nbytes;
            byte[] b = new byte[4096];
            while ((nbytes = ais.read(b)) >= 0) {
                if (recognizer.acceptWaveForm(b, nbytes)) {
                    System.out.println(recognizer.getResult());
                } else {
                    System.out.println(recognizer.getPartialResult());
                }
            }

            System.out.println(recognizer.getFinalResult());
        }
    }
LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=12 max-active=5000 lattice-beam=4
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:6:7:8:9:10
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from conf/vosk-model-small-cn-0.22/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:282) Loading HCL and G from conf/vosk-model-small-cn-0.22/graph/HCLr.fst conf/vosk-model-small-cn-0.22/graph/Gr.fst
LOG (VoskAPI:ReadDataFiles():model.cc:308) Loading winfo conf/vosk-model-small-cn-0.22/graph/phones/word_boundary.int
{
  "partial" : ""
}
{
  "partial" : ""
}
{
  "partial" : ""
}
{
  "partial" : ""
}
{
  "partial" : ""
}
{
  "partial" : ""
}
{
  "partial" : "鐢峰"
}
{
  "partial" : "鐢峰"
}
{
  "partial" : "鍗楁捣"
}
{
  "partial" : "鍗楁捣"
}
{
  "partial" : "鐢峰"
}
{
  "partial" : "鐢峰"
}
{
  "partial" : "鐢峰 鏁︿績"
}
{
  "partial" : "鐢峰 鏁︿績"
}
{
  "partial" : "鐢峰 鏁︿績"
}
{
  "partial" : "鐢峰 鏁︿績"
}
{
  "partial" : "鐢峰 鏁︿績 鍦�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 瀹� 鎶ラ亾"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 瀹� 鎶ラ亾"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 瀹� 鎶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 瀹� 鎶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鐫�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鐫�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鐫�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鐫�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰 鏈�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰 鏈�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰 鏈�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰 鏈�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰 鍜竴鍙�"
}
{
  "partial" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇锋眰 鍜竴鍙�"
}
{
  "text" : "鐢峰 鏁︿績 鍦� 鐨� 涓� 鏃� 鎶� 璧� 鑴氳毒澶� 璇� 鍜竴鍙�"
}
<dependencies>
        <dependency>
            <groupId>net.java.dev.jna</groupId>
            <artifactId>jna</artifactId>
            <version>5.12.1</version>
        </dependency>
        <dependency>
            <groupId>com.alphacephei</groupId>
            <artifactId>vosk</artifactId>
            <version>0.3.45</version>
        </dependency>
    </dependencies>

Do not know why? please

nshmyrev commented 1 year ago

It prints utf-8 values, maybe your terminal expects different encoding. Try to save to a file and read.

As for accuracy, also check input file format, it should be mono 16khz PCM wav. mp3 is not going to work.

liming1010 commented 1 year ago

wav file is single channel and 16000 Hz,No changes made after saving the file,The returned code is messy code QQ截图20230409114617

nshmyrev commented 1 year ago

Those zeros in the string look strange. Maybe you have some other library using JNA and it modified string encoding. See

https://javadoc.io/doc/net.java.dev.jna/jna/latest/com/sun/jna/Native.html#getDefaultStringEncoding--

maybe you can check/set string encoding. It should be utf-8. Like add -Djna.encoding=utf-8 in runtime options.

liming1010 commented 1 year ago

thanks try add System.setProperty("jna.encoding", "utf-8"); result is ok

nshmyrev commented 2 months ago

Same as https://github.com/alphacep/vosk-api/issues/1449