Hello guys,
I'm new to this tool. I would like to know the path of the stones to calculate the similarity between two audio files. I started making a code here but it still has a lot of error. Can someone help me ? Thanks...
`
import os
import random
import time
import torch
import librosa
import numpy as np
from torch.utils.data import DataLoader
from hparam import hparam as hp
from speech_embedder_net import SpeechEmbedder, GE2ELoss, get_centroids, get_cossim
from scipy.io.wavfile import read
def get_utterance(utter_path):
utter_min_len = (hp.data.tisv_frame * hp.data.hop + hp.data.window) * hp.data.sr # lower bound of utterance length
utter, sr = librosa.core.load(utter_path, hp.data.sr) # load utterance audio
intervals = librosa.effects.split(utter, top_db=30) # voice activity detection
# this works fine for timit but if you get array of shape 0 for any other audio change value of top_db
# for vctk dataset use top_db=100
utterances_spec = []
for interval in intervals:
if (interval[1]-interval[0]) > utter_min_len: # If partial utterance is sufficient long,
utter_part = utter[interval[0]:interval[1]] # save first and last 180 frames of spectrogram.
S = librosa.core.stft(y=utter_part, n_fft=hp.data.nfft,win_length=int(hp.data.window * sr), hop_length=int(hp.data.hop * sr))
S = np.abs(S) ** 2
mel_basis = librosa.filters.mel(sr=hp.data.sr, n_fft=hp.data.nfft, n_mels=hp.data.nmels)
S = np.log10(np.dot(mel_basis, S) + 1e-6) # log mel spectrogram of utterances
utterances_spec.append(S[:, :hp.data.tisv_frame]) # first 180 frames of partial utterance
utterances_spec.append(S[:, -hp.data.tisv_frame:]) # last 180 frames of partial utterance
utterances_spec = np.array(utterances_spec)
return utterances_spec
Hello guys, I'm new to this tool. I would like to know the path of the stones to calculate the similarity between two audio files. I started making a code here but it still has a lot of error. Can someone help me ? Thanks... ` import os import random import time import torch import librosa import numpy as np from torch.utils.data import DataLoader
from hparam import hparam as hp from speech_embedder_net import SpeechEmbedder, GE2ELoss, get_centroids, get_cossim from scipy.io.wavfile import read
def get_utterance(utter_path):
def get_data(sample_path):
def get_similarity(model_path, sample_a, sample_b):
if name=="main":
`