使用GPT4修改的文件 https://github.com/lenML/ChatTTS-Forge/blob/main/modules/api/impl/openai_api.py
完整的代码:
from fastapi import HTTPException, Body
from fastapi.responses import StreamingResponse
import io
from numpy import clip
import soundfile as sf
from pydantic import BaseModel, Field
from fastapi.responses import FileResponse
from modules.synthesize_audio import synthesize_audio
from modules.normalization import text_normalize
from modules import generate_audio as generate
from typing import Literal
import pyrubberband as pyrb
from modules.api import utils as api_utils
from modules.api.Api import APIManager
import numpy as np
class AudioSpeechRequest(BaseModel):
input: str # 需要合成的文本
model: str = "chattts-4w"
voice: str = "female2"
response_format: Literal["mp3", "wav"] = "mp3"
speed: float = Field(1, ge=0.1, le=10, description="Speed of the audio")
style: str = ""
batch_size: int = Field(1, ge=1, le=20, description="Batch size")
spliter_threshold: float = Field(
100, ge=10, le=1024, description="Threshold for sentence spliter"
)
seed: int = 42 # 默认值
temperature: float = Field(0.3, ge=0.0, le=1.0, description="Temperature for audio generation")
使用GPT4修改的文件 https://github.com/lenML/ChatTTS-Forge/blob/main/modules/api/impl/openai_api.py 完整的代码: from fastapi import HTTPException, Body from fastapi.responses import StreamingResponse import io from numpy import clip import soundfile as sf from pydantic import BaseModel, Field from fastapi.responses import FileResponse from modules.synthesize_audio import synthesize_audio from modules.normalization import text_normalize from modules import generate_audio as generate from typing import Literal import pyrubberband as pyrb from modules.api import utils as api_utils from modules.api.Api import APIManager import numpy as np
class AudioSpeechRequest(BaseModel): input: str # 需要合成的文本 model: str = "chattts-4w" voice: str = "female2" response_format: Literal["mp3", "wav"] = "mp3" speed: float = Field(1, ge=0.1, le=10, description="Speed of the audio") style: str = "" batch_size: int = Field(1, ge=1, le=20, description="Batch size") spliter_threshold: float = Field( 100, ge=10, le=1024, description="Threshold for sentence spliter" ) seed: int = 42 # 默认值 temperature: float = Field(0.3, ge=0.0, le=1.0, description="Temperature for audio generation")
async def openai_speech_api( request: AudioSpeechRequest = Body( ..., description="JSON body with model, input text, and voice" ) ): try: model = request.model input_text = request.input voice = request.voice style = request.style response_format = request.response_format batch_size = request.batch_size spliter_threshold = request.spliter_threshold speed = request.speed speed = clip(speed, 0.1, 10) temperature = request.temperature
def setup(api_manager: APIManager): api_manager.post( "/v1/audio/speech", response_class=FileResponse, description=""" openai api document: https://platform.openai.com/docs/guides/text-to-speech
以下属性为本系统自定义属性,不在openai文档中: