Closed chenyangMl closed 3 weeks ago
测试中文的唤醒词
git clone https://github.com/k2-fsa/sherpa-onnx cd sherpa-onnx mkdir build cd build cmake -DCMAKE_BUILD_TYPE=Debug .. make -j6
运行测试命令
./sherpa-onnx-keyword-spotter --tokens=/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt --model-type=zipformer --encoder=/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx --decoder=/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx --joiner=/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx --provider=cpu --num-threads=2 --keywords-file=/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt /mnt/data/speech/output_1.wav
错误信息: KeywordSpotterConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OnlineModelConfig(transducer=OnlineTransducerModelConfig(encoder="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx", decoder="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx", joiner="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"), paraformer=OnlineParaformerModelConfig(encoder="", decoder=""), wenet_ctc=OnlineWenetCtcModelConfig(model="", chunk_size=16, num_left_chunks=4), zipformer2_ctc=OnlineZipformer2CtcModelConfig(model=""), nemo_ctc=OnlineNeMoCtcModelConfig(model=""), provider_config=ProviderConfig(device=0, provider="cpu", cuda_config=CudaConfig(cudnn_conv_algo_search=1), trt_config=TensorrtConfig(trt_max_workspace_size=2147483647, trt_max_partition_iterations=10, trt_min_subgraph_size=5, trt_fp16_enable="True", trt_detailed_build_log="False", trt_engine_cache_enable="True", trt_engine_cache_path=".", trt_timing_cache_enable="True", trt_timing_cache_path=".",trt_dump_subgraphs="False" )), tokens="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt", num_threads=2, warm_up=0, debug=False, model_type="zipformer", modeling_unit="cjkchar", bpe_vocab=""), max_active_paths=4, num_trailing_blanks=1, keywords_score=1, keywords_threshold=0.25, keywords_file="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt")
/mnt/work/projects/speech/sherpa-onnx/sherpa-onnx/csrc/online-zipformer-transducer-model.cc:InitEncoder:99 attention_dims does not exist in the metadata
请问这个问题如何定位解决?
--model-type=zipformer
改成
--model-type=zipformer2
或者干脆不提供 --model-type, 用默认值就行.
--model-type
测试中文的唤醒词
git clone https://github.com/k2-fsa/sherpa-onnx cd sherpa-onnx mkdir build cd build cmake -DCMAKE_BUILD_TYPE=Debug .. make -j6
运行测试命令
错误信息: KeywordSpotterConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OnlineModelConfig(transducer=OnlineTransducerModelConfig(encoder="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx", decoder="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx", joiner="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"), paraformer=OnlineParaformerModelConfig(encoder="", decoder=""), wenet_ctc=OnlineWenetCtcModelConfig(model="", chunk_size=16, num_left_chunks=4), zipformer2_ctc=OnlineZipformer2CtcModelConfig(model=""), nemo_ctc=OnlineNeMoCtcModelConfig(model=""), provider_config=ProviderConfig(device=0, provider="cpu", cuda_config=CudaConfig(cudnn_conv_algo_search=1), trt_config=TensorrtConfig(trt_max_workspace_size=2147483647, trt_max_partition_iterations=10, trt_min_subgraph_size=5, trt_fp16_enable="True", trt_detailed_build_log="False", trt_engine_cache_enable="True", trt_engine_cache_path=".", trt_timing_cache_enable="True", trt_timing_cache_path=".",trt_dump_subgraphs="False" )), tokens="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt", num_threads=2, warm_up=0, debug=False, model_type="zipformer", modeling_unit="cjkchar", bpe_vocab=""), max_active_paths=4, num_trailing_blanks=1, keywords_score=1, keywords_threshold=0.25, keywords_file="/mnt/data/speech/keyword-spot/models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt")
/mnt/work/projects/speech/sherpa-onnx/sherpa-onnx/csrc/online-zipformer-transducer-model.cc:InitEncoder:99 attention_dims does not exist in the metadata
请问这个问题如何定位解决?