Open wjddd opened 2 months ago
详见 th2onnx
您好,我参考th2onnx生成onnx模型,并且尝试推理,但是合成的音频没有声音,以下是我的推理代码:
from th_models import get_models, TorchSimBert
content = "测试"
inputs = torch.load("/home/embedding_inputs.pt", map_location='cpu')
sess = get_sess("/home/onnx/embedding.onnx", device='cpu', cpu_threads=8)
inputs = {"input_ids": inputs["input_ids"].cpu().numpy()}
style_embedding = sess.run(input_feed=inputs, output_names=["style_embedding"])
sess = get_sess("/home/onnx/am_encoder.onnx", device='cpu', cpu_threads=8)
(style_encoder, generator, tokenizer, token2id, speaker2id) = get_models("/home/ckpt/g_00020000")
style_encoder = style_encoder.cpu().eval()
start = time.time()
content_embedding = get_style_embedding(content, tokenizer, style_encoder)
content_embedding = content_embedding[np.newaxis, :]
inputs = {
"inputs_ling": np.array([[1, len(content)]]),
"inputs_speaker": np.array([0]),
"inputs_style_embedding": style_embedding[0],
"inputs_content_embedding": content_embedding
}
encoder_op = sess.run(input_feed=inputs, output_names=["x", "d_outs"])
sess = get_sess("/home/onnx/am_decoder.onnx", device='cpu', cpu_threads=8)
inputs = {"x": encoder_op[0], "d_outs": encoder_op[1]}
decoder_op = sess.run(input_feed=inputs, output_names=["logmel"])
sess = get_sess("/home/onnx/voc.onnx", device='cpu', cpu_threads=8)
inputs = {"logmel": decoder_op[0]}
wav = sess.run(input_feed=inputs, output_names=["wav"])[0]
wav = wav.flatten()
sf.write("/home/test.wav", wav, 32000)
我不太确定是哪一部分导致合成音频出现问题,您方便看看吗?
详见 th2onnx