ProletRevDicta / Prolet

Historical documents (in Chinese) about the GPCR (Thanks Comrade RC MR HR etc.)
791 stars 248 forks source link

GNU/Linux SOX tempo Resampler (without PITCH/OTO) #302

Open ghost opened 2 years ago

ghost commented 2 years ago

Newest version:

import wave,sox
cbn=sox.Combiner()
def tempo(fname,time,sn):
    infile=wave.open(fname,'rb')
    rate=infile.getframerate()
    channels=infile.getnchannels()
    swidth=infile.getsampwidth()
    nframes=infile.getnframes()
    audio_signal=infile.readframes(nframes)
    print(length:=nframes/rate,'--->',time)
    factor=length/time
    print(factor)
    tfm=sox.Transformer()
    tfm.tempo(factor)
    tfm.build_file(fname,sn)
lyrics=[a.split(' ')for a in'wu2 5 1.25|chan3 5 0.25|jie1 8 1|ji2 5 1|wen2 6 0.625|hua4 6 0.125|da4 6 0.5|ge2 6 0.5|ming4 5 1|hai1 1 1'.split('|')]
n=0
sb=[]
for a in lyrics:
    sn='%s.wav'%str(n).rjust(6).replace(' ','0')
    ss='symbols/%s'%sn
    tempo('Chinese/%s.wav'%a[0],float(a[2]),ss)
    sb.append(ss)
    n+=1
cbn.build(sb,'result.wav','concatenate')

Oldest version:

import wave,librosa,struct,sox,os,math
from scipy.io import wavfile
import soundfile as sf
cbn=sox.Combiner()
def stretch(fname,time,sn):
    infile=wave.open(fname,'rb')
    rate=infile.getframerate()
    channels=infile.getnchannels()
    swidth=infile.getsampwidth()
    nframes=infile.getnframes()
    audio_signal=infile.readframes(nframes)
    print(length:=nframes/rate,'--->',time)
    factor=length/time
    print(factor)
    '''
    outfile=wave.open('symbols/%s'%sn, 'wb')
    outfile.setnchannels(channels)
    outfile.setsampwidth(swidth)
    outfile.setframerate(rate*factor)
    outfile.writeframes(audio_signal)
    outfile.close()
    '''
    ss='symbols/%s'%sn
    #cbn.stretch(factor)
    os.system('sox %s'%' '.join([fname,ss,'tempo',str(factor)]))
    return factor
lyrics=[a.split(' ')for a in'wu2 5 1.25|chan3 5 0.25|jie1 8 1|ji2 5 1|wen2 6 0.625|hua4 6 0.125|da4 6 0.5|ge2 6 0.5|ming4 5 1|hai1 1 1'.split('|')]
#lyrics=[a.split(' ')for a in'wu1 5 1.25|chan1 5 0.25|jie1 8 1|ji1 5 1|wen1 6 0.625|hua1 6 0.125|da1 6 0.5|ge1 6 0.5|ming1 5 1'.split('|')]
n=0
table=[220.000,246.942,261.626,293.665,329.628,349.228,391.995,440.000]
#table=[1,3,4,6,8,9,11,13]
sb=[]
for a in lyrics:
    sn='%s.wav'%str(n).rjust(6).replace(' ','0')
    stretch('Chinese/%s.wav'%a[0],float(a[2]),sn)
    os.system('sox %s'%' '.join(["symbols/%s"%sn,"symbols2/%s"%sn,'pitch',str(math.log(table[int(a[1])-1]/220.000)/math.log(2)*12*100)]))
    #y,sr=librosa.load("symbols/%s"%sn)
    #b=librosa.effects.pitch_shift(y,sr,n_steps=table[int(a[1])-1])
    #sf.write("symbols2/%s"%sn,b,sr)
    n+=1
    sb.append('symbols2/%s'%sn)
cbn.build(sb,'result.wav','concatenate')
ghost commented 2 years ago

References: https://pysox.readthedocs.io/en/latest/example.html https://www.hupeng.me/articles/92.html https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer.tempo