A Fundamental End-to-End Speech Recognition Toolkit and Open Source SOTA Pretrained Models, Supporting Speech Recognition, Voice Activity Detection, Text Post-processing etc.
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
# initialize pipeline
inference_diar_pipline = pipeline(
mode="sond_demo",
num_workers=0,
task=Tasks.speaker_diarization,
diar_model_config="sond.yaml",
model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
reversion="v1.0.5",
sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
sv_model_revision="v1.2.2",
)
# input: a list of audio in which the first item is a speech recording to detect speakers,
# and the following wav file are used to extract speaker embeddings.
audio_list = [
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk2.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk3.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk4.wav",
]
results = inference_diar_pipline(audio_in=audio_list)
print(results)
result:
AttributeError Traceback (most recent call last)
File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:212](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:212), in build_from_cfg(cfg, registry, group_key, default_args)
211 else:
--> 212 return obj_cls(**args)
213 except Exception as e:
214 # Normal TypeError does not print class name.
File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/audio/speaker_diarization_pipeline.py:75](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/audio/speaker_diarization_pipeline.py:75), in SpeakerDiarizationPipeline.__init__(self, model, sv_model, sv_model_revision, ngpu, **kwargs)
74 from funasr.bin import diar_inference_launch
---> 75 self.funasr_infer_modelscope = diar_inference_launch.inference_launch(
76 mode=self.cmd['mode'],
77 output_dir=self.cmd['output_dir'],
78 batch_size=self.cmd['batch_size'],
79 dtype=self.cmd['dtype'],
80 ngpu=self.cmd['ngpu'],
81 seed=self.cmd['seed'],
82 num_workers=self.cmd['num_workers'],
83 log_level=self.cmd['log_level'],
84 key_file=self.cmd['key_file'],
85 diar_train_config=self.cmd['diar_train_config'],
86 diar_model_file=self.cmd['diar_model_file'],
87 model_tag=self.cmd['model_tag'],
88 allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
89 streaming=self.cmd['streaming'],
90 smooth_size=self.cmd['smooth_size'],
91 dur_threshold=self.cmd['dur_threshold'],
92 out_format=self.cmd['out_format'],
93 param_dict=self.cmd['param_dict'],
94 **kwargs,
95 )
File [/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:357](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:357), in inference_launch(mode, **kwargs)
356 kwargs["param_dict"] = param_dict
--> 357 return inference_sond(mode=mode, **kwargs)
358 elif mode == "eend-ola":
File [/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:95](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:95), in inference_sond(diar_train_config, diar_model_file, output_dir, batch_size, dtype, ngpu, seed, num_workers, log_level, key_file, model_tag, allow_variable_data_keys, streaming, smooth_size, dur_threshold, out_format, param_dict, mode, **kwargs)
94 logging.info("speech2xvector_kwargs: {}".format(speech2xvector_kwargs))
---> 95 speech2xvector = Speech2Xvector.from_pretrained(
96 model_tag=model_tag,
97 **speech2xvector_kwargs,
98 )
99 speech2xvector.sv_model.eval()
AttributeError: type object 'Speech2Xvector' has no attribute 'from_pretrained'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
Cell In[10], line 5
2 from modelscope.utils.constant import Tasks
4 # initialize pipeline
----> 5 inference_diar_pipline = pipeline(
6 mode="sond_demo",
7 num_workers=0,
8 task=Tasks.speaker_diarization,
9 diar_model_config="sond.yaml",
10 model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
11 reversion="v1.0.5",
12 sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
13 sv_model_revision="v1.2.2",
14 )
16 # input: a list of audio in which the first item is a speech recording to detect speakers,
17 # and the following wav file are used to extract speaker embeddings.
18 audio_list = [
19 "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
20 "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
(...)
23 "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk4.wav",
24 ]
File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:140](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:140), in pipeline(task, model, preprocessor, config_file, pipeline_name, framework, device, model_revision, **kwargs)
137 if preprocessor is not None:
138 cfg.preprocessor = preprocessor
--> 140 return build_pipeline(cfg, task_name=task)
File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:56](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:56), in build_pipeline(cfg, task_name, default_args)
45 def build_pipeline(cfg: ConfigDict,
46 task_name: str = None,
47 default_args: dict = None):
48 """ build pipeline given model config dict.
49
50 Args:
(...)
54 default_args (dict, optional): Default initialization arguments.
55 """
---> 56 return build_from_cfg(
57 cfg, PIPELINES, group_key=task_name, default_args=default_args)
File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:215](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:215), in build_from_cfg(cfg, registry, group_key, default_args)
212 return obj_cls(**args)
213 except Exception as e:
214 # Normal TypeError does not print class name.
--> 215 raise type(e)(f'{obj_cls.__name__}: {e}')
AttributeError: SpeakerDiarizationPipeline: type object 'Speech2Xvector' has no attribute 'from_pretrained'
run code from
code:
result: