Closed gushuaialan1 closed 4 months ago
def infer_multilang(self, text, id, lang, sdp_ratio, noise, noisew, length, reference_audio=None, emotion=None,
text_prompt=None, style_text=None, style_weigth=0.7, **kwargs):
sentences_list = split_languages(text, self.lang, expand_abbreviations=True, expand_hyphens=True)
emo = None
if self.hps_ms.model.emotion_embedding == 1:
emo = self._get_emo(reference_audio, emotion).to(self.device).unsqueeze(0)
elif self.hps_ms.model.emotion_embedding == 2:
emo = self._get_clap(reference_audio, text_prompt)
phones, tones, lang_ids, zh_bert, ja_bert, en_bert = [], [], [], [], [], []
for idx, (_text, lang) in enumerate(sentences_list):
skip_start = idx != 0
skip_end = idx != len(sentences_list) - 1
_zh_bert, _ja_bert, _en_bert, _phones, _tones, _lang_ids = self.get_text(_text, lang, self.hps_ms,
style_text, style_weigth)
if skip_start:
_phones = _phones[3:]
_tones = _tones[3:]
_lang_ids = _lang_ids[3:]
if _zh_bert is not None:
_zh_bert = _zh_bert[:, 3:]
if _ja_bert is not None:
_ja_bert = _ja_bert[:, 3:]
if _en_bert is not None:
_en_bert = _en_bert[:, 3:]
if skip_end:
_phones = _phones[:-2]
_tones = _tones[:-2]
_lang_ids = _lang_ids[:-2]
if _zh_bert is not None:
_zh_bert = _zh_bert[:, :-2]
if _ja_bert is not None:
_ja_bert = _ja_bert[:, :-2]
if _en_bert is not None:
_en_bert = _en_bert[:, :-2]
phones.append(_phones)
tones.append(_tones)
lang_ids.append(_lang_ids)
if _zh_bert is not None:
zh_bert.append(_zh_bert)
if _ja_bert is not None:
ja_bert.append(_ja_bert)
if _en_bert is not None:
en_bert.append(_en_bert)
# 在拼接之前添加检查
if not zh_bert:
zh_bert = [torch.zeros(1, 0)]
if not ja_bert:
ja_bert = [torch.zeros(1, 0)]
if not en_bert:
en_bert = [torch.zeros(1, 0)]
zh_bert = torch.cat(zh_bert, dim=1)
ja_bert = torch.cat(ja_bert, dim=1)
en_bert = torch.cat(en_bert, dim=1)
phones = torch.cat(phones, dim=0)
tones = torch.cat(tones, dim=0)
lang_ids = torch.cat(lang_ids, dim=0)
audio = self._infer(id, phones, tones, lang_ids, zh_bert, ja_bert, en_bert, sdp_ratio, noise,
noisew, length, emo)
return audio
修改了bert_vits2.py的infer_multilang函数后,正常了
另外再问大佬一个问题,大佬计划支持fishspeech吗?
感谢贡献!fishspeech再过段时间会更新的
运行环境
问题描述
推理中文特化版模型勾选流式推理报错
问题复现步骤
2024-05-30 00:35:00 [INFO] [BERT-VITS2] text_prompt:Happy [in views.voice_bert_vits2_api:476] 2024-05-30 00:35:00 [INFO] 127.0.0.1 - - [30/May/2024 00:35:00] "GET /voice/bert-vits2?text=还没有下单的姐妹。咱们要赶紧去拍了。您越早下单。仓库越早给姐妹发货。是不是?今天早上刚摘的果子。仓库同步打包。等会都给大家发走。越早下单越新鲜。赶紧去拍。今天新号开播就是为了做数据的。一会数据做好了。不是这个价格。要后悔的哦。还没有点关注的姐姐可以把咱们上方关注点一下。亏钱就是想做做数据的。,&id=0&format=mp3&length=1.0&streaming=true HTTP/1.1" 500 - [in _internal._log:187] 2024-05-30 00:35:00 [ERROR] Error on request: Traceback (most recent call last): File "D:\vsawg\py310\lib\site-packages\werkzeug\serving.py", line 364, in run_wsgi execute(self.server.app) File "D:\vsawg\py310\lib\site-packages\werkzeug\serving.py", line 327, in execute for data in application_iter: File "D:\vsawg\py310\lib\site-packages\werkzeug\wsgi.py", line 289, in next return self._next() File "D:\vsawg\py310\lib\site-packages\werkzeug\wrappers\response.py", line 32, in _iter_encoded for item in iterable: File "D:\vsawg\manager\TTSManager.py", line 436, in stream_bert_vits2_infer audio = infer_func(**state) File "D:\vsawg\bert_vits2\bert_vits2.py", line 395, in infer_multilang ja_bert = torch.cat(ja_bert, dim=1) TypeError: expected Tensor as element 0 in argument 0, but got NoneType [in _internal._log:187] 2024-05-30 00:35:01 [INFO] [BERT-VITS2] id:0 format:mp3 lang:auto length:1.1 noise:0.33 noisew:0.4 sdp_ratio:0.2 segment_size:50 streaming:True [in views.voice_bert_vits2_api:468]