k2-fsa / sherpa-onnx

Speech-to-text, text-to-speech, and speaker recognition using next-gen Kaldi with onnxruntime without Internet connection. Support embedded systems, Android, iOS, Raspberry Pi, RISC-V, x86_64 servers, websocket server/client, C/C++, Python, Kotlin, C#, Go, NodeJS, Java, Swift, Dart, JavaScript, Flutter
https://k2-fsa.github.io/sherpa/onnx/index.html
Apache License 2.0
2.59k stars 295 forks source link

go-api-examples/vad-asr-paraformer bugs #730

Closed jingsupo closed 3 weeks ago

jingsupo commented 3 months ago

Example: sherpa-onnx/go-api-examples/vad-asr-paraformer

go version go1.22.0 windows/amd64

bug info:

2024/04/02 14:06:59.904411 Selected default input device: 麦克风阵列 (适用于数字麦克风的英特尔® 智音技术)
2024/04/02 14:07:00.273700 Started! Please speak
2024/04/02 14:07:18.407361 Detected speech
2024/04/02 14:07:19.597375 Duration: 1.03 seconds
Exception 0xc0000005 0x0 0x1fd95e5c000 0x7ff91c7a3520
PC=0x7ff91c7a3520
signal arrived during external code execution

runtime.cgocall(0xca8090, 0xc00006be10)
        D:/opt/Go/src/runtime/cgocall.go:157 +0x3e fp=0xc00006bde8 sp=0xc00006bdb0 pc=0xc0585e
github.com/k2-fsa/sherpa-onnx-go-windows._Cfunc_DecodeOfflineStream(0x1fd66019060, 0x1fd76a2e840)
        _cgo_gotypes.go:505 +0x4d fp=0xc00006be10 sp=0xc00006bde8 pc=0xc9eb2d
github.com/k2-fsa/sherpa-onnx-go-windows.(*OfflineRecognizer).Decode.func1(0x4060?, 0xc00000a610?)
        C:/Users/Administrator/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows@v1.9.15/sherpa_onnx.go:493 +0x73 fp=0xc00006be60 sp=0xc00006be10 pc=0xca0cd3
github.com/k2-fsa/sherpa-onnx-go-windows.(*OfflineRecognizer).Decode(0xc00000a0c0?, 0x0?)
        C:/Users/Administrator/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows@v1.9.15/sherpa_onnx.go:493 +0x13 fp=0xc00006be80 sp=0xc00006be60 pc=0xca0c33
main.decode(0xc00000a0c0, 0xc000180020, 0x0)
        D:/Desktop/sherpa-onnx-master/go-api-examples/vad-asr-paraformer/main.go:127 +0xd9 fp=0xc00006bfb8 sp=0xc00006be80 pc=0xca6e99
main.main.gowrap5()
        D:/Desktop/sherpa-onnx-master/go-api-examples/vad-asr-paraformer/main.go:112 +0x28 fp=0xc00006bfe0 sp=0xc00006bfb8 pc=0xca6b48
runtime.goexit({})
        D:/opt/Go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc00006bfe8 sp=0xc00006bfe0 pc=0xc65781
created by main.main in goroutine 1
        D:/Desktop/sherpa-onnx-master/go-api-examples/vad-asr-paraformer/main.go:112 +0x730

goroutine 1 gp=0xc000058000 m=0 mp=0xd8de00 [syscall]:
runtime.cgocall(0xca7930, 0xc000117cb0)
        D:/opt/Go/src/runtime/cgocall.go:157 +0x3e fp=0xc000117c88 sp=0xc000117c50 pc=0xc0585e
github.com/gordonklaus/portaudio._Cfunc_Pa_ReadStream(0x1fd65702de0, 0xc00012a000, 0x640)
        _cgo_gotypes.go:521 +0x55 fp=0xc000117cb0 sp=0xc000117c88 pc=0xc9b135
github.com/gordonklaus/portaudio.(*Stream).Read.func1(0xc000117d00?, 0xc00012a000, 0x640)
        C:/Users/Administrator/go/pkg/mod/github.com/gordonklaus/portaudio@v0.0.0-20230709114228-aafa478834f5/portaudio.go:1004 +0x6e fp=0xc000117ce8 sp=0xc000117cb0 pc=0xc9e14e
github.com/gordonklaus/portaudio.(*Stream).Read(0xc000132000)
        C:/Users/Administrator/go/pkg/mod/github.com/gordonklaus/portaudio@v0.0.0-20230709114228-aafa478834f5/portaudio.go:1004 +0x51 fp=0xc000117d10 sp=0xc000117ce8 pc=0xc9e091
main.main()
        D:/Desktop/sherpa-onnx-master/go-api-examples/vad-asr-paraformer/main.go:89 +0x610 fp=0xc000117f50 sp=0xc000117d10 pc=0xca6750
runtime.main()
        D:/opt/Go/src/runtime/proc.go:271 +0x28b fp=0xc000117fe0 sp=0xc000117f50 pc=0xc39bab
runtime.goexit({})
        D:/opt/Go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc000117fe8 sp=0xc000117fe0 pc=0xc65781

goroutine 2 gp=0xc000058700 m=nil [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        D:/opt/Go/src/runtime/proc.go:402 +0xce fp=0xc00005bfa8 sp=0xc00005bf88 pc=0xc39fae
runtime.goparkunlock(...)
        D:/opt/Go/src/runtime/proc.go:408
runtime.forcegchelper()
        D:/opt/Go/src/runtime/proc.go:326 +0xb8 fp=0xc00005bfe0 sp=0xc00005bfa8 pc=0xc39e38
runtime.goexit({})
        D:/opt/Go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc00005bfe8 sp=0xc00005bfe0 pc=0xc65781
created by runtime.init.6 in goroutine 1
        D:/opt/Go/src/runtime/proc.go:314 +0x1a

goroutine 3 gp=0xc000058a80 m=nil [GC sweep wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        D:/opt/Go/src/runtime/proc.go:402 +0xce fp=0xc00005df80 sp=0xc00005df60 pc=0xc39fae
runtime.goparkunlock(...)
        D:/opt/Go/src/runtime/proc.go:408
runtime.bgsweep(0xc000068000)
        D:/opt/Go/src/runtime/mgcsweep.go:278 +0x94 fp=0xc00005dfc8 sp=0xc00005df80 pc=0xc249b4
runtime.gcenable.gowrap1()
        D:/opt/Go/src/runtime/mgc.go:203 +0x25 fp=0xc00005dfe0 sp=0xc00005dfc8 pc=0xc192c5
runtime.goexit({})
        D:/opt/Go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc00005dfe8 sp=0xc00005dfe0 pc=0xc65781
created by runtime.gcenable in goroutine 1
        D:/opt/Go/src/runtime/mgc.go:203 +0x66

goroutine 4 gp=0xc000058c40 m=nil [GC scavenge wait]:
runtime.gopark(0xc000068000?, 0xd02970?, 0x1?, 0x0?, 0xc000058c40?)
        D:/opt/Go/src/runtime/proc.go:402 +0xce fp=0xc00006ff78 sp=0xc00006ff58 pc=0xc39fae
runtime.goparkunlock(...)
        D:/opt/Go/src/runtime/proc.go:408
runtime.(*scavengerState).park(0xd8d7a0)
        D:/opt/Go/src/runtime/mgcscavenge.go:425 +0x49 fp=0xc00006ffa8 sp=0xc00006ff78 pc=0xc223a9
runtime.bgscavenge(0xc000068000)
        D:/opt/Go/src/runtime/mgcscavenge.go:653 +0x3c fp=0xc00006ffc8 sp=0xc00006ffa8 pc=0xc2293c
runtime.gcenable.gowrap2()
        D:/opt/Go/src/runtime/mgc.go:204 +0x25 fp=0xc00006ffe0 sp=0xc00006ffc8 pc=0xc19265
runtime.goexit({})
        D:/opt/Go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc00006ffe8 sp=0xc00006ffe0 pc=0xc65781
created by runtime.gcenable in goroutine 1
        D:/opt/Go/src/runtime/mgc.go:204 +0xa5

goroutine 5 gp=0xc000059180 m=nil [finalizer wait]:
runtime.gopark(0xc00005fe48?, 0xc0f0c5?, 0xa8?, 0x1?, 0xc000058000?)
        D:/opt/Go/src/runtime/proc.go:402 +0xce fp=0xc00005fe20 sp=0xc00005fe00 pc=0xc39fae
runtime.runfinq()
        D:/opt/Go/src/runtime/mfinal.go:194 +0x107 fp=0xc00005ffe0 sp=0xc00005fe20 pc=0xc18347
runtime.goexit({})
        D:/opt/Go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc00005ffe8 sp=0xc00005ffe0 pc=0xc65781
created by runtime.createfing in goroutine 1
        D:/opt/Go/src/runtime/mfinal.go:164 +0x3d
rax     0x1fd7653258c
rbx     0x1fd76543ddc
rcx     0x1fd1e460000
rdx     0x1fd1e460000
rdi     0x386a0
rsi     0x1fd76549810
rbp     0x4e069ff480
rsp     0x4e069ff380
r8      0x1fd9553f020
r9      0x1
r10     0x1fd1e460000
r11     0x4e069ff250
r12     0x4e069ff630
r13     0x0
r14     0x1fd95e5c000
r15     0x3c31471d
rip     0x7ff91c7a3520
rflags  0x10293
cs      0x33
fs      0x53
gs      0x2b
csukuangfj commented 3 months ago

能够复现吗

jingsupo commented 3 months ago

可以啊,每一次都出这个错,会不会是我的portaudio相关依赖的问题?关键是这个错误提示看不出问题出在哪啊?

csukuangfj commented 3 months ago

非Windows能复现吗

jingsupo commented 3 months ago

非Windows没试过

jingsupo commented 3 months ago

我在阿里云的服务器上测试了Linux环境,由于没有音频设备,报了下面的错误

2024/04/06 12:52:21.578184 Failed to get default input device: %v
no default input device

不过我在阿里云的服务器上测试了non-streaming-decode-files,这个直接进行音频的语音识别的,所以运行正常

[root@iZ2zeatc350vkqmd1l53tjZ non-streaming-decode-files]# ./run-paraformer.sh 
2024/04/06 12:56:21.554292 Reading ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/3-sichuan.wav
2024/04/06 12:56:21.571568 Initializing recognizer (may take several seconds)
2024/04/06 12:56:26.031247 Recognizer created!
2024/04/06 12:56:26.031276 Start decoding!
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 0, 40 vs -1
2024/04/06 12:56:26.626338 Decoding done!
2024/04/06 12:56:26.626369 自己就是在那个在那个就是在情节里面就是感觉是演得特别好就是好像很真实一样你知道吧
2024/04/06 12:56:26.626392 Wave duration: 7.835 seconds
csukuangfj commented 3 months ago

那Windows上可以跑这个识别文件的例子么

jingsupo commented 3 weeks ago

今天又重新编译了,可以运行成功~

go version go1.22.2 windows/amd64

D:\a\sherpa-onnx\sherpa-onnx\sherpa-onnx\c-api\c-api.cc:SherpaOnnxCreateVoiceActivityDetector:812 VadModelConfig(silero_vad=SilerVadModelConfig(model="./silero_vad.onnx", threshold=0.5, min_silence_duration=0.5, min_speech_duration=0.25, window_size=512), sample_rate=16000, num_threads=1, provider="cpu", debug=True)
D:\a\sherpa-onnx\sherpa-onnx\sherpa-onnx\c-api\c-api.cc:CreateOfflineRecognizer:398 OfflineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="", decoder_filename="", joiner_filename=""), paraformer=OfflineParaformerModelConfig(model="./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx"), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), telespeech_ctc="", tokens="./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=2, debug=True, provider="cpu", model_type="", modeling_unit="", bpe_vocab=""), lm_config=OfflineLMConfig(model="", scale=1), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0, rule_fsts="", rule_fars="")
2024/07/02 15:42:21.264935 Selected default input device: 麦克风阵列 (Realtek High Definition
2024/07/02 15:42:21.347389 Started! Please speak
2024/07/02 15:42:52.563583 Detected speech
2024/07/02 15:42:54.272830 Duration: 1.51 seconds
2024/07/02 15:42:54.348905 今天天气不错
2024/07/02 15:42:54.349189 Saved to seg-0-1.51-seconds-今天天气不错.wav
2024/07/02 15:42:54.349711 ----------
csukuangfj commented 3 weeks ago

太棒啦!可以关闭了么?

jingsupo commented 3 weeks ago

我来关闭吧^^