Open HZLong36 opened 2 weeks ago
api.py 第 167行
for i,t in enumerate(text):
if not t.strip():
continue
tmp_name=f"{tmp_dir}/{time.time()}-{i}-{tts_type}.wav"
print(f'{t=}\n{tmp_name=},\n{tts_type=}\n{params=}')
if tts_type=='tts':
# 仅文字合成语音
output = tts_model.inference_sft(t, params['role'],stream=False)
print(output)
elif tts_type=='clone_eq':
# 同语言克隆
output=clone_model.inference_zero_shot(t,params['reference_text'], prompt_speech_16k)
else:
output = clone_model.inference_cross_lingual(f'<|{params["lang"]}|>{t}', prompt_speech_16k)
# torchaudio.save(tmp_name, output['tts_speech'], 22050) 改成->
torchaudio.save(tmp_name, list(output)[0]['tts_speech'], 22050)
out_list.append(tmp_name)
tmp_name='G:/AI工具/CosyVoice/tmp/1729305816.023298-0-tts.wav', tts_type='tts' params={'text': '你好啊我的朋友', 'lang': 'zh', 'role': '中文女', 'reference_audio': None, 'reference_text': ''} <generator object CosyVoice.inference_sft at 0x000001D8048250B0> 'generator' object is not subscriptable