A Fundamental End-to-End Speech Recognition Toolkit and Open Source SOTA Pretrained Models, Supporting Speech Recognition, Voice Activity Detection, Text Post-processing etc.
想使用sencevoice模型进行asr,并且区分说话人。加载一直报错。
加载模型代码如下
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
model = AutoModel(model="/data/asr/SenseVoice/iic/SenseVoiceSmall", vad_model="/data/asr/FunASR-main/model_zoo/fsmn-vad",punc_model="/data/asr/FunASR-main/model_zoo/ct-punc",
vad_kwargs={"max_single_segment_time": 30000},
device="cuda:0",
disable_update=True,
spk_model="/data/asr/FunASR-main/model_zoo/cam"
)
res = model.generate(
input=f"/data/asr/FunASR-main/tests/voice.mp3",
cache={},
language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn=True,
batch_size_s=60,
merge_vad=True, #
merge_length_s=15,
)
text = rich_transcription_postprocess(res[0]["text"])
print(text)
报错如下
File "/data/asr/FunASR-main/tests/test.py", line 15, in
res = model.generate(
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/auto/auto_model.py", line 263, in generate
return self.inference_with_vad(input, input_len=input_len, cfg)
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/auto/auto_model.py", line 495, in inference_with_vad
punc_res = self.inference(
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/auto/auto_model.py", line 302, in inference
res = model.inference(batch, **kwargs)
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/models/bicif_paraformer/model.py", line 257, in inference
speech, speech_lengths = extract_fbank(
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/utils/load_utils.py", line 170, in extract_fbank
data_len.asrend(data_i.shape[0])
AttributeError: 'str' object has no attribute 'shape'
想使用sencevoice模型进行asr,并且区分说话人。加载一直报错。 加载模型代码如下 from funasr import AutoModel from funasr.utils.postprocess_utils import rich_transcription_postprocess
model = AutoModel(model="/data/asr/SenseVoice/iic/SenseVoiceSmall", vad_model="/data/asr/FunASR-main/model_zoo/fsmn-vad",punc_model="/data/asr/FunASR-main/model_zoo/ct-punc", vad_kwargs={"max_single_segment_time": 30000}, device="cuda:0", disable_update=True, spk_model="/data/asr/FunASR-main/model_zoo/cam" )
res = model.generate( input=f"/data/asr/FunASR-main/tests/voice.mp3", cache={}, language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech" use_itn=True, batch_size_s=60, merge_vad=True, # merge_length_s=15, ) text = rich_transcription_postprocess(res[0]["text"]) print(text) 报错如下 File "/data/asr/FunASR-main/tests/test.py", line 15, in
res = model.generate(
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/auto/auto_model.py", line 263, in generate
return self.inference_with_vad(input, input_len=input_len, cfg)
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/auto/auto_model.py", line 495, in inference_with_vad
punc_res = self.inference(
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/auto/auto_model.py", line 302, in inference
res = model.inference(batch, **kwargs)
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/models/bicif_paraformer/model.py", line 257, in inference
speech, speech_lengths = extract_fbank(
File "/data/asr/SenseVoice/venv/lib/python3.9/site-packages/funasr/utils/load_utils.py", line 170, in extract_fbank
data_len.asrend(data_i.shape[0])
AttributeError: 'str' object has no attribute 'shape'