Open bannert1337 opened 4 weeks ago
I'm going to follow the following steps to help you solve the GitHub issue:
transcribe
function in backend/apps/audio/main.py
to accept the STT model name.STTEngine
setting in src/lib/components/chat/Settings/Audio.svelte
to allow specifying the STT model name.transcribe
API endpoint.Here are the changes we decided to make. I'm done making edits and now I'm just validating the changes using a linter to catch any mistakes like syntax errors or undefined variables:
backend/apps/audio/main.py
Update the `transcribe` function to accept the STT model name as a parameter.
---
+++
@@ -1,6 +1,7 @@
@app.post("/transcriptions")
def transcribe(
file: UploadFile = File(...),
+ model: str = Form(...),
user=Depends(get_current_user),
):
log.info(f"file.content_type: {file.content_type}")
@@ -20,7 +21,7 @@
f.close()
whisper_kwargs = {
- "model_size_or_path": WHISPER_MODEL,
+ "model_size_or_path": model,
"device": whisper_device_type,
"compute_type": "int8",
"download_root": WHISPER_MODEL_DIR,
src/lib/components/chat/Settings/Audio.svelte
Add a new variable to store the STT model name and update the "Speech-to-Text Engine" setting to allow specifying the model name.
---
+++
@@ -16,8 +16,9 @@
let OpenAIKey = '';
let OpenAISpeaker = '';
- let STTEngines = ['', 'openai'];
+ let STTEngines = ['', 'openai', 'whisper-openai'];
let STTEngine = '';
+ let STTModel = '';
let conversationMode = false;
let speechAutoSend = false;
src/lib/components/chat/Settings/Audio.svelte
Rewritten instructions to resolve the error. Update the original_code and new_code blocks as required, ensuring that the
---
+++
@@ -1,4 +1,4 @@
- <div class=" py-0.5 flex w-full justify-between">
+ <div class=" py-0.5 flex w/full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
<div class="flex items-center relative">
<select
@@ -20,6 +20,21 @@
>
<option value="">{$i18n.t('Default (Web API)')}</option>
<option value="whisper-local">{$i18n.t('Whisper (Local)')}</option>
+ <option value="whisper-openai">{$i18n.t('Whisper (OpenAI)')}</option>
</select>
</div>
- </div>
+ </div>
+
+ {#if STTEngine === 'whisper-openai'}
+ <div class=" py-0.5 flex w-full justify-between">
+ <div class=" self-center text-xs font-medium">{$i18n.t('STT Model Name')}</div>
+ <div class="flex items-center relative">
+ <input
+ class="dark:bg-gray-900 w-full rounded px-2 p-1 text-xs bg-transparent outline-none"
+ bind:value={STTModel}
+ placeholder="Enter STT model name"
+ required
+ />
+ </div>
+ </div>
+ {/if}
src/lib/components/chat/Settings/Audio.svelte
Update the `saveSettings` function call to include the STT model name setting.
---
+++
@@ -7,6 +7,7 @@
saveSettings({
audio: {
STTEngine: STTEngine !== '' ? STTEngine : undefined,
+ STTModel: STTModel !== '' ? STTModel : undefined,
TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
speaker:
(TTSEngine === 'openai' ? OpenAISpeaker : speaker) !== ''
src/lib/apis/openai/index.ts
Update the `transcribe` function to accept the STT model name and pass it to the backend API.
---
+++
@@ -1,3 +1,38 @@
+export const transcribe = async (
+ token: string = '',
+ file: File,
+ model: string = 'whisper-1'
+) => {
+ let error = null;
+
+ const formData = new FormData();
+ formData.append('file', file);
+ formData.append('model', model);
+
+ const res = await fetch(`${OPENAI_API_BASE_URL}/transcriptions`, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${token}`
+ },
+ body: formData
+ })
+ .then(async (res) => {
+ if (!res.ok) throw await res.json();
+ return res.json();
+ })
+ .catch((err) => {
+ console.log(err);
+ error = err;
+ return null;
+ });
+
+ if (error) {
+ throw error;
+ }
+
+ return res;
+};
+
export const generateSearchQuery = async (
token: string = '',
model: string,
Your changes have been successfully made to the branch sweep/stt_model_name_is_not_used_when_calling
. I have validated these changes using a syntax checker and a linter.
[!TIP] To recreate the pull request, edit the issue title or description.
This is an automated message generated by Sweep AI.
Default value for model is
whisper-1
. I changed it todistil-large-v3
. The API call fails and the log shows that modelwhisper-1
was requested, even though I defined another value.