amsehili / auditok

An audio/acoustic activity detection and audio segmentation tool
MIT License
745 stars 96 forks source link

Real-Time Silence detection from bytes #44

Closed DoodleBears closed 6 months ago

DoodleBears commented 1 year ago

From #23, I am trying to split speaker's audio using pyaudio stream:

The Callback Part (how can I use in_data and split it like read input from microphone in #23 )

def callback(self, in_data, frame_count, time_info, status):
    """Write frames and return PA flag"""
    # wave_file.writeframes(in_data)
    self.frames.append(in_data)
    input= b''.join(self.frames)
    print(input)
    reader = AudioReader(
        input=input,
        sr=self.__SAMPLE_RATE,
        sw=self.__SAMPLE_WIDTH,
        ch=self.__CHANNEL
        )
    for (i, region) in enumerate(split(
        input=reader,
        # eth=self.__ENERGY_THRESHOLD,
        max_silence=self.__MAX_SILENCE,
        max_dur=self.__MAX_DURATION,
        min_dur=self.__MIN_DURATION
        )):
        print(f"{constants.CONSOLE_COLOR_RED}split{constants.CONSOLE_COLOR_WHITE}")
        path = f'{constants.TEMP_SPEAKER_OUTPUT_AUDIO_DIR}/{str(time.time()) + constants.TEMP_SPEAKER_OUTPUT_AUDIO_FORMAT}'
        region.save(path)
        self.frames = []
        break

    return (in_data, pyaudio.paContinue)

The pyaudio part

with p.open(format=pyaudio.paInt16,
        channels=default_speakers["maxInputChannels"],
        rate=int(default_speakers["defaultSampleRate"]),
        frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
        input=True,
        input_device_index=default_speakers["index"],
        stream_callback=self.callback
) as stream:
    """
    Opena PA stream via context manager.
    After leaving the context, everything will
    be correctly closed(Stream, PyAudio manager)            
    """
    while self.ai_listen_handler.is_listening_speaker:
        time.sleep(1)

Use auditok.split to split microphone input in real-time

for region in auditok.split(
    input=None,
    sr=self.__SAMPLE_RATE,
    sw=self.__SAMPLE_WIDTH,
    ch=self.__CHANNEL,
    eth=self.__ENERGY_THRESHOLD,
    max_silence=self.__MAX_SILENCE,
    max_dur=self.__MAX_DURATION,
    min_dur=self.__MIN_DURATION
    ):
    if not self.ai_listen_handler.is_listening_mic:
        return

    path = f'{constants.TEMP_MIC_INPUT_AUDIO_DIR}/{str(time.time()) + constants.TEMP_MIC_INPUT_AUDIO_FORMAT}'
    region.save(path)
DoodleBears commented 1 year ago

Just find a way to deal with it, but..., weird

with p.open(format=pyaudio.paInt16,
        channels=default_speakers["maxInputChannels"],
        rate=int(default_speakers["defaultSampleRate"]),
        frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
        input=True,
        input_device_index=default_speakers["index"],
) as stream:
    """
    Opena PA stream via context manager.
    After leaving the context, everything will
    be correctly closed(Stream, PyAudio manager)            
    """
    temp_region = None
    temp_frame = b''
    silence = 0
    threshold_sec = 1.2

    while stream.is_active():
        # read
        if silence > 100 * self.__SAMPLE_RATE / 1024: # clean mem if silence too long
            temp_frame = b''
        temp_frame += stream.read(1024)

        silence += 1 # calc the silence time 
        # then, if there is a long silence after have some input, split it

        for region in auditok.split(
            input=temp_frame,
            sr=self.__SAMPLE_RATE,
            sw=self.__SAMPLE_WIDTH,
            ch=self.__CHANNEL,
            max_silence=self.__MAX_SILENCE,
            max_dur=self.__MAX_DURATION,
            min_dur=self.__MIN_DURATION
            ):
            silence = 0 # new input will reset the silence time
            temp_frame = b''
            if temp_region == None:
                temp_region = region
            else:
                temp_region += region

        if  temp_region != None and silence > threshold_sec * self.__SAMPLE_RATE / 1024:
            temp_region.save(path)
            temp_region = None

stream.close()