PaddlePaddle / PaddleSpeech

Easy-to-use Speech Toolkit including Self-Supervised Learning model, SOTA/Streaming ASR with punctuation, Streaming TTS with text frontend, Speaker Verification System, End-to-End Speech Translation and Keyword Spotting. Won NAACL2022 Best Demo Award.
https://paddlespeech.readthedocs.io
Apache License 2.0
11.2k stars 1.86k forks source link

使用自己的声音做语音合成本地部署失败 #2988

Open ANyyJS opened 1 year ago

ANyyJS commented 1 year ago

参考https://zhuanlan.zhihu.com/p/587765776代码就改了下文件路径,运行报错,python和paddle都装好了。 报错:

Traceback (most recent call last): File "E:\Record\anntest\test_1.py", line 26, in <module> frontend = get_frontend( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\exps\syn_utils.py", line 272, in get_frontend frontend = MixFrontend( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\mix_frontend.py", line 29, in __init__ self.zh_frontend = Frontend( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\zh_frontend.py", line 147, in __init__ with open(phone_vocab_path, 'rt') as f: OSError: [Errno 22] Invalid argument: 'E:\\Record\x07nntest\\demo\\phone_id_map.txt'

文件夹路径是E:\Record\anntest,模型在E:\Record\anntest\demo,代码如下: `from pathlib import Path import soundfile as sf import os from paddlespeech.t2s.exps.syn_utils import get_am_output from paddlespeech.t2s.exps.syn_utils import get_frontend from paddlespeech.t2s.exps.syn_utils import get_predictor from paddlespeech.t2s.exps.syn_utils import get_voc_output

在其他环境中,记得修改下面这两个变量的路径

am_inference_dir = "E:\Record\anntest\demo" voc_inference_dir = "E:\Record\anntest\pwgan_aishell3_static_1.1.0" # 这里以 pwgan_aishell3 为例子

音频生成的路径,修改成你音频想要保存的路径

wav_output_dir = "E:\Record\anntest\output"

选择设备[gpu / cpu],这里以GPU为例子,

device = "cpu"

想要生成的文本和对应文件名

text_dict = { "1": "今天天气真不错,欢迎和我一起玩。", "2": "我认为跑步给我的身体带来了健康。", }

frontend

frontend = get_frontend( lang="mix", phones_dict=os.path.join(am_inference_dir, "phone_id_map.txt"), tones_dict=None )

am_predictor

am_predictor = get_predictor( model_dir=am_inference_dir, model_file="fastspeech2_mix" + ".pdmodel", params_file="fastspeech2_mix" + ".pdiparams", device=device)

voc_predictor

voc_predictor = get_predictor( model_dir=voc_inference_dir, model_file="pwgan_aishell3" + ".pdmodel", # 这里以 pwgan_aishell3 为例子,其它模型记得修改此处模型名称 params_file="pwgan_aishell3" + ".pdiparams", device=device)

output_dir = Path(wav_output_dir) output_dir.mkdir(parents=True, exist_ok=True)

sentences = list(text_dict.items())

merge_sentences = True fs = 24000 for utt_id, sentence in sentences: am_output_data = get_am_output( input=sentence, am_predictor=am_predictor, am="fastspeech2_mix", frontend=frontend, lang="mix", merge_sentences=merge_sentences, speaker_dict=os.path.join(am_inference_dir, "phone_id_map.txt"), spk_id=0, ) wav = get_voc_output( voc_predictor=voc_predictor, input=am_output_data)

保存文件

sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)

`

参考用了另一种代码,报了另一种错。 代码: `from pathlib import Path

import soundfile as sf

import os

from paddlespeech.t2s.exps.syn_utils import get_am_output

from paddlespeech.t2s.exps.syn_utils import get_frontend

from paddlespeech.t2s.exps.syn_utils import get_predictor

from paddlespeech.t2s.exps.syn_utils import get_voc_output

def get_text_dict(name:str,txtname:str):

ff = open(txtname,"r",encoding="ISO-8859-1")

msg = ff.read()

ff.close()

text_list = msg.split("\n")

text_dict = {}

num = 0

for i in text_list:

text_dict[name+str(num)] = i

num+=1

print(f"{name}text:{num}")

return text_dict

def the_main(text_dict):

frontend

frontend = get_frontend(

lang="mix",

phones_dict=os.path.join(am_inference_dir, "phone_id_map.txt"),

tones_dict=None

)

am_predictor

am_predictor = get_predictor(

model_dir=am_inference_dir,

model_file="fastspeech2_mix" + ".pdmodel",

params_file="fastspeech2_mix" + ".pdiparams",

device=device)

voc_predictor

voc_predictor = get_predictor(

model_dir=voc_inference_dir,

model_file="pwgan_aishell3" + ".pdmodel",  # 这里以 pwgan_aishell3 为例子,其它模型记得修改此处模型名称

params_file="pwgan_aishell3" + ".pdiparams",

device=device)

output_dir = Path(wav_output_dir)

output_dir.mkdir(parents=True, exist_ok=True)

sentences = list(text_dict.items())

merge_sentences = True

fs = 24000

for utt_id, sentence in sentences:

am_output_data = get_am_output(

  input=sentence,

  am_predictor=am_predictor,

  am="fastspeech2_mix",

  frontend=frontend,

  lang="mix",

  merge_sentences=merge_sentences,

  speaker_dict=os.path.join(am_inference_dir, "phone_id_map.txt"),

  spk_id=0, )

wav = get_voc_output(

    voc_predictor=voc_predictor, input=am_output_data)

# 保存文件

sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)

return

if name == 'main':

模型路径

am_inference_dir = "demo"

声码器路径,这里以 pwgan_aishell3 为例子

voc_inference_dir = "pwgan_aishell3_static_1.1.0"

音频生成的路径,修改成你音频想要保存的路径

wav_output_dir = "output"

选择设备[gpu / cpu],这里以GPU为例子,

device = "cpu"

想要生成的文本文档对应文件名

txt_name = "文档.txt"

the_main(get_text_dict(name=am_inference_dir,txtname=txt_name)) `

报错: Traceback (most recent call last): File "E:\Record\anntest\test_2.py", line 155, in <module> the_main(get_text_dict(name=am_inference_dir,txtname=txt_name)) File "E:\Record\anntest\test_2.py", line 103, in the_main am_output_data = get_am_output( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\exps\syn_utils.py", line 503, in get_am_output frontend_dict = run_frontend( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\exps\syn_utils.py", line 310, in run_frontend input_ids = frontend.get_input_ids( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\mix_frontend.py", line 123, in get_input_ids input_ids = self.zh_frontend.get_input_ids( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\zh_frontend.py", line 540, in get_input_ids phonemes = self.get_phonemes( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\zh_frontend.py", line 455, in get_phonemes phonemes = self._g2p( File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\zh_frontend.py", line 261, in _g2p sub_finals = self.tone_modifier.modified_tone(word, pos, File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\tone_sandhi.py", line 353, in modified_tone finals = self._three_sandhi(word, finals) File "D:\Program Files\Python39\lib\site-packages\paddlespeech\t2s\frontend\tone_sandhi.py", line 173, in _three_sandhi finals[0] = finals[0][:-1] + "2" IndexError: list index out of range

iftaken commented 1 year ago

错误看起来是语音合成前端引起的,需要你检查一下PaddleSpeech的版本以及输入的文本,看看输入是否符合要求

ANyyJS commented 1 year ago

谢谢回答。版本是paddlespeech1.3.0 paddlespeech-feat 0.1.0。输入的文本没有问题。

stale[bot] commented 1 year ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.