SYSTRAN / faster-whisper

Faster Whisper transcription with CTranslate2
MIT License
11.85k stars 989 forks source link

When it runs in a thread, it can run normally. But when the thread exits, it cannot exit normally, causing an exception. #480

Open HerbertLi2020 opened 1 year ago

HerbertLi2020 commented 1 year ago

When it runs in a thread, it can run normally. But when the thread exits, it cannot exit normally, causing an exception.

guillaumekln commented 1 year ago

Please post a small code snippet to reproduce the issue.

Also what is your OS?

HerbertLi2020 commented 1 year ago

Hello, Thank you very much for receiving your reply. My OS is Windows11. Here is my all code. Click [Start], runing for about 1 minute, then click[Quit], and the problem will reappear. Notes: you can use the software [Voice Meeter] [https://vb-audio.com/] to get the voice.


import audioop
import io, time,  wave
from queue import Queue

import numpy as np
import os, sys, threading
from datetime import datetime, timedelta

from PyQt5 import QtWidgets, QtGui, QtCore
from PyQt5.QtWidgets import QWidget, QMessageBox, QApplication
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from AI_Translate_UI import Ui_trans
import pyaudio, torch
from faster_whisper import WhisperModel

pa = pyaudio.PyAudio()
for i in range(pa.get_device_count()):
    device_info = pa.get_device_info_by_index(i)
    if device_info['maxInputChannels'] > 0 and device_info['hostApi'] == 0 and 'Microsoft' not in device_info['name']:
        print(device_info['index'],device_info['name'])

max_energy = 5000
sample_rate = 16000
chunk_size = 1024
max_int16 = 2 ** 15
data_queue = Queue()
my_title = "TEST"
stop_flag = False
model_dir = ".\model_faster_whisper\whisper-base-en"

if torch.has_cuda: gpu_ok = True; device = "cuda"
else: gpu_ok = False; device = "cpu"

class recording_thread(QThread):
    def __init__(self, main_windows, stream:pyaudio.Stream):
        super(recording_thread, self).__init__()
        self.main_win = main_windows
        self.stream = stream
    def run(self):
        global max_energy, stop_flag, data_queue
        while not stop_flag:
            data = self.stream.read(chunk_size)  
            energy = audioop.rms(data, pa.get_sample_size(pyaudio.paInt16))
            if energy > max_energy:
                max_energy = energy
            data_queue.put(data)
            time.sleep(0.05)
        while not data_queue.empty(): data = data_queue.get()
        print('Exit Thread(recording_thread)...')

class main_windows(QWidget):
    def __init__(self):
        super(main_windows, self).__init__()

        self.createLayout()
        palette1 = QtGui.QPalette()
        palette1.setColor(palette1.Background, QtGui.QColor(200, 200, 200))
        self.setPalette(palette1)

        self.setWindowTitle(my_title)
        self.show()

    def startrun(self):
        global stop_flag
        stop_flag = False
        self.stream = pa.open(
                            format=pyaudio.paInt16,
                            channels=1,
                            rate=sample_rate,
                            input=True,
                            frames_per_buffer=chunk_size,
                            input_device_index=2)

        self.record_thread = recording_thread(self, self.stream)
        self.record_thread.start()

        t = threading.Thread(target=self.TransateThread)
        t.start()

    def TransateThread(self):
        global stop_flag, data_queue
        next_transcribe_time = None
        transcribe_rate_seconds = 0.5
        transcribe_rate = timedelta(seconds=transcribe_rate_seconds)
        max_record_time = 5
        silence_time = 0.5
        last_sample = bytes()
        samples_with_silence = 0
        silence_energy = 500
        task = 'transcribe'
        text = ''

        faster_model = WhisperModel(model_dir, device=device, compute_type="int8",
                                         num_workers=1, local_files_only=True)

        while not stop_flag:
            if not data_queue.empty():
                now = datetime.utcnow()
                if not next_transcribe_time:  # Set next_transcribe_time for the first time.
                    next_transcribe_time = now + transcribe_rate
                if now > next_transcribe_time:
                    next_transcribe_time = now + transcribe_rate

                    phrase_complete = False
                    while not data_queue.empty():
                        data = data_queue.get()
                        energy = audioop.rms(data, pa.get_sample_size(pyaudio.paInt16))
                        if energy < silence_energy:
                            samples_with_silence += 1
                        else:
                            samples_with_silence = 0

                        if samples_with_silence > sample_rate / chunk_size * silence_time:
                            phrase_complete = True
                            last_sample = bytes()
                        last_sample += data

                    # Write out raw frames as a wave file.
                    wav_file = io.BytesIO()
                    wav_writer: wave.Wave_write = wave.open(wav_file, "wb")
                    wav_writer.setframerate(sample_rate)
                    wav_writer.setsampwidth(pa.get_sample_size(pyaudio.paInt16))
                    wav_writer.setnchannels(1)
                    wav_writer.writeframes(last_sample)
                    wav_writer.close()
                    # Read the audio data, now with wave headers.
                    wav_file.seek(0)
                    wav_reader: wave.Wave_read = wave.open(wav_file)
                    samples = wav_reader.getnframes()
                    audio = wav_reader.readframes(samples)
                    wav_reader.close()

                    audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
                    audio_as_np_float32 = audio_as_np_int16.astype(np.float32)
                    audio_normalised = audio_as_np_float32 / max_int16

                    if not phrase_complete:
                        segments, info = faster_model.transcribe(audio_normalised, language='en',
                                                                           task=task,
                                                                           condition_on_previous_text=True,
                                                                           without_timestamps=True)
                        for segment in segments:
                            text = segment.text
                        print(text)

                        audio_length_in_seconds = samples / float(sample_rate)
                        if (audio_length_in_seconds > max_record_time):
                            last_sample = bytes()
                        self.up_show.setText(text)

            time.sleep(0.1)

    def stoprun(self):
        global stop_flag
        r_button = QMessageBox.question(self, my_title,"\n\nAre you Sure?   \n\n", QMessageBox.Yes | QMessageBox.No)
        if r_button == QMessageBox.Yes: stop_flag = True

    def quitWin(self):
        global stop_flag
        r_button = QMessageBox.question(self, my_title, "\n\nAre you Sure?\n\n", QMessageBox.Yes | QMessageBox.No)
        if r_button == QMessageBox.Yes:
            stop_flag = True
            time.sleep(3)
            sys.exit()

    def createLayout(self):
        self.resize(630, 136)
        self.startButton = QtWidgets.QPushButton('Start', self)
        self.startButton.setGeometry(QtCore.QRect(80, 90, 71, 23))
        # self.stopButton = QtWidgets.QPushButton('Stop', self)
        # self.stopButton.setGeometry(QtCore.QRect(180, 90, 71, 23))
        self.quitButton = QtWidgets.QPushButton('Quit', self)
        self.quitButton.setGeometry(QtCore.QRect(480, 90, 71, 23))
        self.up_show = QtWidgets.QLabel('Show the text...', self)
        self.up_show.setGeometry(QtCore.QRect(10, 30, 601, 31))
        self.startButton.clicked.connect(self.startrun)
        # self.stopButton.clicked.connect(self.stoprun)
        self.quitButton.clicked.connect(self.quitWin)

if __name__ == '__main__':
    QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
    app = QtWidgets.QApplication(sys.argv)
    main_windows = main_windows()
    sys.exit(app.exec_())
HerbertLi2020 commented 1 year ago

My Python is 3.8, the dependency library list is:

Package Version


aiohttp 3.8.5 aiosignal 1.3.1 argostranslate 1.8.1 async-timeout 4.0.3 attrs 23.1.0 av 10.0.0 certifi 2023.7.22 charset-normalizer 3.2.0 click 8.1.7 colorama 0.4.6 coloredlogs 15.0.1 ctranslate2 3.17.0 edge-tts 6.1.8 faster-whisper 0.9.0 ffmpeg-python 0.2.0 filelock 3.12.4 flatbuffers 23.5.26 frozenlist 1.4.0 fsspec 2023.9.1 future 0.18.3 huggingface-hub 0.16.4 humanfriendly 10.0 idna 3.4 importlib-metadata 6.8.0 install 1.3.5 Jinja2 3.1.2 joblib 1.3.2 llvmlite 0.40.1 MarkupSafe 2.1.3 more-itertools 10.1.0 mpmath 1.3.0 multidict 6.0.4 networkx 3.1 numba 0.57.1 numpy 1.24.4 onnxruntime 1.15.1 openai-whisper 20230918 opencv-python 4.8.0.76 packaging 23.1 Pillow 10.0.1 pip 23.2.1 protobuf 4.24.3 PyAudio 0.2.13 pydub 0.25.1 PyQt5 5.15.9 PyQt5-Qt5 5.15.2 PyQt5-sip 12.12.2 pyreadline3 3.4.1 pywin32 306 PyYAML 6.0.1 regex 2023.8.8 requests 2.31.0 sacremoses 0.0.53 safetensors 0.3.3 sentencepiece 0.1.99 setuptools 49.2.1 six 1.16.0 stanza 1.1.1 sympy 1.12 tiktoken 0.3.3 tokenizers 0.14.0 torch 2.0.1+cu118 torchaudio 2.0.2+cu118 torchvision 0.15.2+cu118 tqdm 4.66.1 transformers 4.33.2 typing_extensions 4.8.0 urllib3 2.0.4 WMI 1.5.1 yarl 1.9.2 zhconv 1.4.3 zipp 3.16.2