CheshireCC / faster-whisper-GUI

faster_whisper GUI with PySide6
GNU Affero General Public License v3.0
1.69k stars 104 forks source link

分享自用参数(对于没有杂音和音乐的网课类英语音频近乎完美) #247

Open czyrichard opened 2 weeks ago

czyrichard commented 2 weeks ago

1)使用的模型:faster-whisper-large-v3-turbo-ct2 2)建议每次针对音频的内容对hotwords进行更改,对提升转写质量有奇效。

{ "theme": "light", "demucs": { "overlap": 0.1, "segment": 10.0, "tracks": 1 }, "model_param": { "localModel": true, "onlineModel": false, "modelName": 0, "use_v3_model": true, "device": 1, "deviceIndex": "0", "preciese": 4, "thread_num": "4", "num_worker": "1", "local_files_only": true }, "vad_param": { "use_VAD": false, "threshold": 0.4, "minSpeechDuration": "250", "minSilenceDuration": "100", "maxSpeechDuration": "30", "windowSize": 2, "speechPad": "2000" }, "setting": { "saveConfig": true, "autoLoadModel": false, "language": 1, "autoGoToOutputPage": 2, "autoClearTempFiles": false, }, "Transcription_param": { "aggregate_contents": false, "language": 1, "task": false, "beam_size": "10", "best_of": "5", "patience": "2", "length_penalty": "3", "temperature": "0.0,0.2,0.4,0.6,0.8,1.0", "compression_ratio_threshold": "5", "log_prob_threshold": "-1.0", "no_speech_threshold": "0.2", "condition_on_previous_text": true, "initial_prompt": "Hello.", "prefix": "", "suppress_blank": true, "suppress_tokens": "-1", "without_timestamps": false, "max_initial_timestamp": "9999999.0", "word_timestamps": false, "prepend_punctuations": "\"'“¿([{-", "append_punctuations": "\"'.。,,!!??::”)]}、", "repetition_penalty": "1", "no_repeat_ngram_size": "0", "prompt_reset_on_temperature": "0.5", "chunk_length": "30", "clip_mode": 0, "max_new_tokens": "448", "clip_timestamps": "", "hallucination_silence_threshold": "0", "hotwords": "This is a web seminar about the placement of American English accent.", "language_detection_threshold": "", "language_detection_segments": "1" }, "output_whisperX": { "tabMovable": true, "tabScrollable": false, "tabShadowEnabled": false, "tabMaxWidth": 500, "closeDisplayMode": 0, "whisperXMinSpeaker": 0, "whisperXMaxSpeaker": 0, "outputFormat": 5, "outputEncoding": 1 } }