SYSTRAN / faster-whisper

Faster Whisper transcription with CTranslate2
MIT License
11.74k stars 977 forks source link

How to implement microphone transcription using Faster Whisper? #700

Open 20246688 opened 7 months ago

jordimas commented 7 months ago

You have an experimental implementation here: https://github.com/Softcatala/whisper-ctranslate2/blob/main/src/whisper_ctranslate2/live.py

ramzeez88 commented 3 months ago

here , you can try this: import pyaudio import wave from faster_whisper import WhisperModel import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 2 RATE = 44100 RECORD_SECONDS = 5 WAVE_OUTPUT_FILENAME = "output.wav"

model_size = "distil-large-v3" model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)

print("* recording")

while True: frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

segments, info = model.transcribe(WAVE_OUTPUT_FILENAME, beam_size=5, language="en", condition_on_previous_text=False)

for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))