with torch.no_grad():
if not skip_refine_text:
refined = self.instance._refine_text(
text,
self.instance.device,
params_refine_text,
)
text_tokens = refined.ids
text_tokens = [
i[i.less(self.instance.tokenizer_break_0_ids)] for i in text_tokens
]
text = self.get_tokenizer().batch_decode(text_tokens)
refined.destroy()
if refine_text_only:
yield text
return
if not smooth_decoding:
length = [0 for _ in range(len(text))]
for result in self.instance._infer_code(
text,
stream,
self.instance.device,
use_decoder,
params_infer_code,
):
wavs = self._decode_to_wavs(result, length, use_decoder)
yield wavs
代码运行到这里会报错:
RuntimeError: expected scalar type ComplexDouble but found ComplexHalf
File "/Users/panhong/miniconda3/envs/ChatTTS_colab/lib/python3.11/site-packages/vocos/spectral_ops.py", line 46, in forward
return torch.istft(spec, self.n_fft, self.hop_length, self.win_length, self.window, center=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: expected scalar type ComplexDouble but found ComplexHalf
阅读 README.md 和 dependencies.md
检索 issue 和 discussion
检查 Forge 版本
Forge Commit 或者 Tag
1
Python 版本
3.11
PyTorch 版本
2.3.1
操作系统信息
macox
BUG 描述
代码运行到这里会报错: RuntimeError: expected scalar type ComplexDouble but found ComplexHalf
BUG 端点
tts_to_audio/tts_stream
复现参数
{ "text": "这是一个很有意思的事情", "speaker_wav": "3d40607bf216486fb508758fa67f1a83", "language": "zh" }
期望结果
返回音频
实际结果
报错
错误信息