modelscope / FunASR

A Fundamental End-to-End Speech Recognition Toolkit and Open Source SOTA Pretrained Models, Supporting Speech Recognition, Voice Activity Detection, Text Post-processing etc.
https://www.funasr.com
Other
6.06k stars 649 forks source link

SpeakerDiarizationPipeline: type object 'Speech2Xvector' has no attribute 'from_pretrained' #706

Closed lingfengchencn closed 1 year ago

lingfengchencn commented 1 year ago

run code from

https://alibaba-damo-academy.github.io/FunASR/en/modelscope_pipeline/sd_pipeline.html#quick-start

code:

from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

# initialize pipeline
inference_diar_pipline = pipeline(
    mode="sond_demo",
    num_workers=0,
    task=Tasks.speaker_diarization,
    diar_model_config="sond.yaml",
    model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
    reversion="v1.0.5",
    sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
    sv_model_revision="v1.2.2",
)

# input: a list of audio in which the first item is a speech recording to detect speakers, 
# and the following wav file are used to extract speaker embeddings.
audio_list = [
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk2.wav",
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk3.wav",
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk4.wav",
]

results = inference_diar_pipline(audio_in=audio_list)
print(results)

result:

AttributeError                            Traceback (most recent call last)
File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:212](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:212), in build_from_cfg(cfg, registry, group_key, default_args)
    211     else:
--> 212         return obj_cls(**args)
    213 except Exception as e:
    214     # Normal TypeError does not print class name.

File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/audio/speaker_diarization_pipeline.py:75](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/audio/speaker_diarization_pipeline.py:75), in SpeakerDiarizationPipeline.__init__(self, model, sv_model, sv_model_revision, ngpu, **kwargs)
     74 from funasr.bin import diar_inference_launch
---> 75 self.funasr_infer_modelscope = diar_inference_launch.inference_launch(
     76     mode=self.cmd['mode'],
     77     output_dir=self.cmd['output_dir'],
     78     batch_size=self.cmd['batch_size'],
     79     dtype=self.cmd['dtype'],
     80     ngpu=self.cmd['ngpu'],
     81     seed=self.cmd['seed'],
     82     num_workers=self.cmd['num_workers'],
     83     log_level=self.cmd['log_level'],
     84     key_file=self.cmd['key_file'],
     85     diar_train_config=self.cmd['diar_train_config'],
     86     diar_model_file=self.cmd['diar_model_file'],
     87     model_tag=self.cmd['model_tag'],
     88     allow_variable_data_keys=self.cmd['allow_variable_data_keys'],
     89     streaming=self.cmd['streaming'],
     90     smooth_size=self.cmd['smooth_size'],
     91     dur_threshold=self.cmd['dur_threshold'],
     92     out_format=self.cmd['out_format'],
     93     param_dict=self.cmd['param_dict'],
     94     **kwargs,
     95 )

File [/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:357](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:357), in inference_launch(mode, **kwargs)
    356         kwargs["param_dict"] = param_dict
--> 357     return inference_sond(mode=mode, **kwargs)
    358 elif mode == "eend-ola":

File [/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:95](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/mnt/ai/FunASR/funasr/bin/diar_inference_launch.py:95), in inference_sond(diar_train_config, diar_model_file, output_dir, batch_size, dtype, ngpu, seed, num_workers, log_level, key_file, model_tag, allow_variable_data_keys, streaming, smooth_size, dur_threshold, out_format, param_dict, mode, **kwargs)
     94 logging.info("speech2xvector_kwargs: {}".format(speech2xvector_kwargs))
---> 95 speech2xvector = Speech2Xvector.from_pretrained(
     96     model_tag=model_tag,
     97     **speech2xvector_kwargs,
     98 )
     99 speech2xvector.sv_model.eval()

AttributeError: type object 'Speech2Xvector' has no attribute 'from_pretrained'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
Cell In[10], line 5
      2 from modelscope.utils.constant import Tasks
      4 # initialize pipeline
----> 5 inference_diar_pipline = pipeline(
      6     mode="sond_demo",
      7     num_workers=0,
      8     task=Tasks.speaker_diarization,
      9     diar_model_config="sond.yaml",
     10     model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
     11     reversion="v1.0.5",
     12     sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
     13     sv_model_revision="v1.2.2",
     14 )
     16 # input: a list of audio in which the first item is a speech recording to detect speakers, 
     17 # and the following wav file are used to extract speaker embeddings.
     18 audio_list = [
     19     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
     20     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
   (...)
     23     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk4.wav",
     24 ]

File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:140](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:140), in pipeline(task, model, preprocessor, config_file, pipeline_name, framework, device, model_revision, **kwargs)
    137 if preprocessor is not None:
    138     cfg.preprocessor = preprocessor
--> 140 return build_pipeline(cfg, task_name=task)

File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:56](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/pipelines/builder.py:56), in build_pipeline(cfg, task_name, default_args)
     45 def build_pipeline(cfg: ConfigDict,
     46                    task_name: str = None,
     47                    default_args: dict = None):
     48     """ build pipeline given model config dict.
     49 
     50     Args:
   (...)
     54         default_args (dict, optional): Default initialization arguments.
     55     """
---> 56     return build_from_cfg(
     57         cfg, PIPELINES, group_key=task_name, default_args=default_args)

File [~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:215](https://vscode-remote+ssh-002dremote-002b106-002e14-002e181-002e44.vscode-resource.vscode-cdn.net/ai/FunASR/~/miniconda3/envs/asr/lib/python3.10/site-packages/modelscope/utils/registry.py:215), in build_from_cfg(cfg, registry, group_key, default_args)
    212         return obj_cls(**args)
    213 except Exception as e:
    214     # Normal TypeError does not print class name.
--> 215     raise type(e)(f'{obj_cls.__name__}: {e}')

AttributeError: SpeakerDiarizationPipeline: type object 'Speech2Xvector' has no attribute 'from_pretrained'
jmwang66 commented 1 year ago

This has been fixed. Please pull the newest code and try again. PR link: https://github.com/alibaba-damo-academy/FunASR/pull/703