Closed thedtvn closed 9 months ago
To mix audio together, just sum two audio buffers with the +
operator, then divide by 2 to avoid clipping in the output:
with AudioFile('3.ogg', "w", samplerate=48000, num_channels=2) as fs:
with AudioFile('1.ogg') as f:
data1 = f.read(f.samplerate * f.frames)
with AudioFile('2.ogg') as f2:
data2 = f2.read(f2.samplerate * f2.frames)
fs.write((data1 + data2) / 2) # Mixing happens here
Note that the audio buffers need to be the same sample rate and length for this to work.
A more general solution that allows for different sample rates and lengths:
with AudioFile('3.ogg', "w", samplerate=48000, num_channels=2) as fs:
with AudioFile('1.ogg').resampled_to(fs.samplerate) as f:
data1 = f.read(f.samplerate * f.frames)
with AudioFile('2.ogg').resampled_to(fs.samplerate) as f2:
data2 = f2.read(f2.samplerate * f2.frames)
total_length = max(data1.shape[1], data2.shape[1])
# Extend the shorter buffer with silence if necessary:
if data1.shape[1] < total_length:
data1 = np.pad(data1, [(0, 0), (0, total_length - data1.shape[1])])
elif data2.shape[1] < total_length:
data2 = np.pad(data2, [(0, 0), (0, total_length - data2.shape[1])])
fs.write((data1 + data2) / 2)
Or, even better: do the above in chunks to avoid needing to read the entire file at once, saving a lot of memory:
output_sr = 48000
chunk_size = 48000 # increase this for faster speed, decrease this for lower memory usage
with AudioFile('3.ogg', "w", samplerate=output_sr, num_channels=2) as fs, \
AudioFile("1.ogg").resampled_to(output_sr) as f1, \
AudioFile("2.ogg").resampled_to(output_sr) as f2:
while f1.tell() < f1.frames or f2.tell() < f2.frames:
chunk1 = f1.read(chunk_size)
chunk2 = f2.read(chunk_size)
# Extend the shorter chunk with silence if necessary:
if chunk1.shape[1] < chunk2.shape[1]:
chunk1 = np.pad(chunk1, [(0, 0), (0, chunk2.shape[1] - chunk1.shape[1])])
elif chunk2.shape[1] < chunk1.shape[1]:
chunk2 = np.pad(chunk2, [(0, 0), (0, chunk1.shape[1] - chunk2.shape[1])])
fs.write((chunk1 + chunk2) / 2)
ex