Yesterday I crashed my Ubuntu20 laptop trying to write 12 hours of wav data to an ogg-file. The behavior before the machine gave up is an indication that poor memory management is the probable cause of this.
Basic recipe: retrieve 5 min of wav-data from harddisk and append it to an ogg-file. Repeat until 12 hours are processed into a single ogg-file.
I think there are more problems with the script having to do with 16 bits versus 32 bits. That is a problem I was looking into, but after this crash I've decided to give up on pyogg.
But calling the code below causes the problem:
import soundfile as sf
from pprint import pprint
import numpy as np
import time
import datetime
import re
import os
from copy import deepcopy
import glob
import pyogg
# Create a OpusBufferedEncoder
# Parameters:
samples_per_second = 48000
channels = 1
framesize = 20 # milliseconds
desired_frame_duration = framesize/1000
desired_frame_size = int(desired_frame_duration * samples_per_second)
opus_buffered_encoder = pyogg.OpusBufferedEncoder()
opus_buffered_encoder.set_application("audio")
opus_buffered_encoder.set_sampling_frequency(samples_per_second)
opus_buffered_encoder.set_channels(channels)
opus_buffered_encoder.set_frame_size(framesize) # milliseconds
# day part strings
start_time_am = '00-00-00'
start_time_pm = '12-00-00'
swap_day_part = {start_time_am:start_time_pm, start_time_pm:start_time_am}
def create_new_ogg_opus_writer(out_dir, meter_id, endtime, start_time_string):
outfile_name = meter_id + '-' + str(endtime.year) + '-' + str(endtime.month) + '-' + str(endtime.day) + '-' + start_time_string + '.ogg'
outfile_path = os.path.join(out_dir, outfile_name)
ogg_opus_writer = pyogg.OggOpusWriter(outfile_path,
opus_buffered_encoder)
return ogg_opus_writer
def create_half_day_ogg_files(wav_info_dict, FILE_OUTPUT=True, worklimit = None, amplification = None):
previous_meter_id = None
root = wav_info_dict['root']
out_root = wav_info_dict['out_root']
file_list = sorted(glob.glob(root), key = lambda filename: os.path.basename(filename))
print(root, file_list[:3])
for fileno, filepath in enumerate(file_list[:worklimit]):
# print(fileno, filepath)
if FILE_OUTPUT:
samps, samplerate = sf.read(filepath, dtype='int32',)
else:
samps, samplerate = np.random.binomial(10,0.5,1000).astype(np.int32)-5, 48000
#print(type(samps), samps.dtype, samps.shape)
filename = os.path.basename(filepath)
if amplification is not None:
samps *= amplification
parts = re.split("[.+_-]+", filename)
year, month, day, hour, minute, second, frac = [int(_) for _ in parts[1:-1]]
starttime = datetime.datetime(year, month, day, hour, minute, second, frac*100)
endtime = starttime + datetime.timedelta(seconds = int(samps.shape[0]/samplerate))
meter_id = parts[0]
if endtime.strftime("%p") == 'AM':
start_time_string = start_time_am
elif endtime.strftime("%p") == 'PM':
start_time_string = start_time_pm
if meter_id != previous_meter_id or previous_start_time_string != start_time_string:
print('\n', meter_id, year, month, day, hour, minute, second, frac, end='')
ogg_opus_writer = create_new_ogg_opus_writer(out_root, meter_id, endtime, start_time_string)
if FILE_OUTPUT:
ogg_opus_writer.write(samps)
else:
#print('-'+starttime.strftime("%p")+endtime.strftime("%p"), end='')
print('.', end='')
if starttime.strftime("%p") != endtime.strftime("%p"): # If start and end are in different parts of the day start a new file
print(starttime.strftime("%p"), endtime.strftime("%p"))
split_sec = 3600 - (starttime.minute*60+starttime.second)
samps_last = samps[:split_sec*samplerate]
samps_first = samps[split_sec*samplerate:]
## Here the end of the file with part of this file
print('split_sec'+ str(split_sec)+'\n' )
if FILE_OUTPUT:
ogg_opus_writer.write(samps)
ogg_opus_writer.close()
## Here the start of the new file.
start_time_string = swap_day_part[start_time_string]
ogg_opus_writer = create_new_ogg_opus_writer(out_root, meter_id, endtime, start_time_string)
# print('\n', meter_id, year, month, day, hour, minute, second, frac, end='')
if FILE_OUTPUT:
ogg_opus_writer.write(samps)
else: # We get here if neither the node and the start_time_string (AM or PM) has changed
## Here the concatenation of the full wav files
if FILE_OUTPUT:
ogg_opus_writer.write(samps)
previous_meter_id = meter_id
previous_start_time_string = start_time_string
time.sleep(0.1)
ogg_opus_writer.close()
return
where the code was called referencing a folder containing a bunch of wav files of the format <meter_id>-<date>-<time>+<10-4secs>.wav
Yesterday I crashed my Ubuntu20 laptop trying to write 12 hours of wav data to an ogg-file. The behavior before the machine gave up is an indication that poor memory management is the probable cause of this.
Basic recipe: retrieve 5 min of wav-data from harddisk and append it to an ogg-file. Repeat until 12 hours are processed into a single ogg-file.
I think there are more problems with the script having to do with 16 bits versus 32 bits. That is a problem I was looking into, but after this crash I've decided to give up on pyogg.
But calling the code below causes the problem:
where the code was called referencing a folder containing a bunch of wav files of the format
<meter_id>-<date>-<time>+<10-4secs>.wav