Open Turkrosoft opened 10 months ago
In fact, I was able to do this in a Discord bot for Piper using praat-parcelmout. Here's a code snippet:
def change_pitch_with_praat(audio_file, factor):
snd = parselmouth.Sound(audio_file)
manipulation = call(snd, "To Manipulation", 0.01, 75, 600)
pitch_tier = call(manipulation, "Extract pitch tier")
call(pitch_tier, "Multiply frequencies", snd.xmin, snd.xmax, factor)
call([pitch_tier, manipulation], "Replace pitch tier")
return call(manipulation, "Get resynthesis (overlap-add)")
async def piper(
interaction: discord.Interaction,
voice: str,
speaker: Optional[int] = 0,
rate: Optional[float] = 1.00,
pitch: Optional[float] = 1.00,
text: Optional[str] = "This is a test."
):
loop = asyncio.get_event_loop()
await interaction.response.send_message(f'Text-To-Speech request sent by {interaction.user.mention}.\nStarting...', ephemeral=True)
task = loop.create_task(piper_thread(discord.Interaction, voice, speaker, rate, text))
wav_filename = await task
if wav_filename == -1:
await interaction.channel.send(f'This voice doesn't exists! Please use /list to get a list. The voice was: {voz}.')
return
utc_now = datetime.datetime.now(timezone.utc)
if pitch != 1.00:
print(pitch)
audio = change_pitch_with_praat(wav_filename, pitch)
wav_filename = f"{wav_filename[:-4]}_changed.wav"
audio.save(wav_filename, "WAV")
if (utc_now - interaction.created_at).total_seconds() < 900:
if len(text) > 1500:
realtest = "Large text."
else:
realtest = text
await interaction.channel.send (f"{interaction.user.mention}, this is your audio generated with {voice}!\nText: {realtest}", file=discord.File(wav_filename))
else:
print("Error! The interaction has expired and a response can't be sent.")
Is it possible to preserve the formants when adjusting pitch? Many TTS engines support that. There may also be two separate modes or a check box letting the user adjust the pitch by or without effecting the formants.