TeamPyOgg / PyOgg

Simple OGG Vorbis, Opus and FLAC bindings for Python
The Unlicense
63 stars 27 forks source link

Memory leaks in PyOgg? #92

Closed RonaldAJ closed 2 years ago

RonaldAJ commented 2 years ago

Yesterday I crashed my Ubuntu20 laptop trying to write 12 hours of wav data to an ogg-file. The behavior before the machine gave up is an indication that poor memory management is the probable cause of this.

Basic recipe: retrieve 5 min of wav-data from harddisk and append it to an ogg-file. Repeat until 12 hours are processed into a single ogg-file.

I think there are more problems with the script having to do with 16 bits versus 32 bits. That is a problem I was looking into, but after this crash I've decided to give up on pyogg.

But calling the code below causes the problem:

import soundfile as sf

from pprint import pprint
import numpy as np
import time
import datetime
import re
import os
from copy import deepcopy

import glob
import pyogg

# Create a OpusBufferedEncoder
# Parameters:
samples_per_second = 48000
channels = 1
framesize = 20  # milliseconds
desired_frame_duration = framesize/1000
desired_frame_size = int(desired_frame_duration * samples_per_second)

opus_buffered_encoder = pyogg.OpusBufferedEncoder()
opus_buffered_encoder.set_application("audio")
opus_buffered_encoder.set_sampling_frequency(samples_per_second)
opus_buffered_encoder.set_channels(channels)
opus_buffered_encoder.set_frame_size(framesize) # milliseconds

# day part strings

start_time_am = '00-00-00'
start_time_pm = '12-00-00'

swap_day_part = {start_time_am:start_time_pm, start_time_pm:start_time_am}

def create_new_ogg_opus_writer(out_dir, meter_id, endtime, start_time_string):

    outfile_name = meter_id + '-' + str(endtime.year) + '-' + str(endtime.month) + '-' + str(endtime.day) + '-' + start_time_string + '.ogg'
    outfile_path = os.path.join(out_dir, outfile_name)

    ogg_opus_writer = pyogg.OggOpusWriter(outfile_path,
                                          opus_buffered_encoder)

    return ogg_opus_writer

def create_half_day_ogg_files(wav_info_dict, FILE_OUTPUT=True, worklimit = None, amplification = None):
    previous_meter_id = None

    root = wav_info_dict['root']
    out_root = wav_info_dict['out_root']

    file_list = sorted(glob.glob(root), key = lambda filename: os.path.basename(filename))

    print(root, file_list[:3])

    for fileno, filepath in enumerate(file_list[:worklimit]):
        # print(fileno, filepath)

        if FILE_OUTPUT:
            samps, samplerate = sf.read(filepath, dtype='int32',)
        else:
            samps, samplerate = np.random.binomial(10,0.5,1000).astype(np.int32)-5, 48000

        #print(type(samps), samps.dtype, samps.shape)
        filename = os.path.basename(filepath)

        if amplification is not None:
            samps *= amplification

        parts = re.split("[.+_-]+", filename)

        year, month, day, hour, minute, second, frac = [int(_) for _ in parts[1:-1]]

        starttime = datetime.datetime(year, month, day, hour, minute, second, frac*100)
        endtime = starttime + datetime.timedelta(seconds = int(samps.shape[0]/samplerate))
        meter_id = parts[0]

        if endtime.strftime("%p") == 'AM':
            start_time_string = start_time_am
        elif endtime.strftime("%p") == 'PM':
            start_time_string = start_time_pm

        if meter_id != previous_meter_id or previous_start_time_string != start_time_string:
            print('\n', meter_id, year, month, day, hour, minute, second, frac, end='')
            ogg_opus_writer = create_new_ogg_opus_writer(out_root, meter_id, endtime, start_time_string)
            if FILE_OUTPUT:
                ogg_opus_writer.write(samps)
        else:
            #print('-'+starttime.strftime("%p")+endtime.strftime("%p"), end='')
            print('.', end='')
            if starttime.strftime("%p") != endtime.strftime("%p"): # If start and end are in different parts of the day start a new file
                print(starttime.strftime("%p"), endtime.strftime("%p"))
                split_sec = 3600 - (starttime.minute*60+starttime.second)
                samps_last = samps[:split_sec*samplerate]
                samps_first = samps[split_sec*samplerate:]

                ## Here the end of the file with part of this file
                print('split_sec'+ str(split_sec)+'\n' )
                if FILE_OUTPUT:
                    ogg_opus_writer.write(samps)
                    ogg_opus_writer.close()

                ## Here the start of the new file.
                start_time_string = swap_day_part[start_time_string]
                ogg_opus_writer = create_new_ogg_opus_writer(out_root, meter_id, endtime, start_time_string)

                # print('\n', meter_id, year, month, day, hour, minute, second, frac, end='')

                if FILE_OUTPUT:
                    ogg_opus_writer.write(samps)

            else:    # We get here if neither the node and the start_time_string (AM or PM) has changed
                ## Here the concatenation of the full wav files
                if FILE_OUTPUT:
                    ogg_opus_writer.write(samps)

        previous_meter_id = meter_id
        previous_start_time_string = start_time_string
        time.sleep(0.1)

    ogg_opus_writer.close()

    return

where the code was called referencing a folder containing a bunch of wav files of the format <meter_id>-<date>-<time>+<10-4secs>.wav

import os

from create_half_day_ogg_files import create_new_ogg_opus_writer, create_half_day_ogg_files

wav_info_dict={}
wav_info_dict['root'] = f'./wavfiles/*.wav'
wav_info_dict['out_root'] = f'./oggfiles/'

try: 
    os.mkdir(wav_info_dict['out_root'])
except: 
    print('out_root exists')

print(wav_info_dict)

create_half_day_ogg_files(wav_info_dict, FILE_OUTPUT=True, worklimit = None)