Open Pipe-Runner opened 6 years ago
this error from Bregman lib that doesnt work with lastest numpy. try to install from here for python3 https://github.com/pkmital/BregmanToolkit Then change in features_base.py: num_frames = 1000 #len(self.x)
try this code first:
from bregman.suite import *
p = default_feature_params() import os path=r'./audio_dataset/' os.chdir(path)
audio_file = ("amen.wav") print(audio_file) F = Features(audio_file, p) imagesc(F.X,dbscale=True)
F.inverse(F.X, pvoc=True) # invert features to audio play(balance_signal(F.x_hat),F.sample_rate)
p['feature']='stft' p['nfft']=1024 p['wfft']=512 p['nhop']=256 F = Features(audio_file, p) imagesc(F.X,dbscale=True) title('Wide-band spectrogram')
F.inverse(F.X) # invert features to audio play(balance_signal(F.x_hat),F.sample_rate)
tuts = get_tutorials() execfile(tuts[0])
execfile(tuts[1])
To fix the issue with python2 - https://github.com/BinRoot/BregmanToolkit/pull/1/commits/f7b924b8a4e67ae5ceac3d1285acf35f4395112d
import tensorflow as tf import numpy as np from bregman.suite import * import os
path=r'./audio_dataset/' os.chdir(path) print(os.getcwd())
k = 2 max_iterations = 100
filenames = tf.train.match_filenames_once('*.wav') count_num_files = tf.size(filenames)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() filename, file_contents = reader.read(filename_queue)
chromo = tf.placeholder(tf.float32) max_freqs = tf.argmax(chromo, 0)
def get_next_chromogram(sess): audio_file = sess.run(filename) F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205) return F.X, audio_file
def extract_feature_vector(sess, chromo_data): num_features, num_samples = np.shape(chromo_data) freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data}) hist, bins = np.histogram(freq_vals, bins=range(num_features + 1)) normalized_hist = hist.astype(float) / num_samples return normalized_hist
def get_dataset(sess): num_files = sess.run(count_num_files) coord = tf.train.Coordinator() threads = tf.train.start_queuerunners(coord=coord) xs = list() names = list() plt.figure() for in range(num_files): chromo_data, filename = get_next_chromogram(sess)
plt.subplot(1, 2, 1)
plt.imshow(chromo_data, cmap='Greys', interpolation='nearest')
plt.title('Visualization of Sound Spectrum')
plt.subplot(1, 2, 2)
freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
plt.hist(freq_vals)
plt.title('Histogram of Notes')
plt.xlabel('Musical Note')
plt.ylabel('Count')
plt.savefig('{}.png'.format(filename))
plt.clf()
plt.clf()
names.append(filename)
x = extract_feature_vector(sess, chromo_data)
xs.append(x)
xs = np.asmatrix(xs)
return xs, names
def initial_cluster_centroids(X, k): return X[0:k, :]
def assign_cluster(X, centroids): expanded_vectors = tf.expand_dims(X, 0) expanded_centroids = tf.expand_dims(centroids, 1) distances = tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroids)), 2) mins = tf.argmin(distances, 0) return mins
def recompute_centroids(X, Y): sums = tf.unsorted_segment_sum(X, Y, k) counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k) return sums / counts
with tf.Session() as sess: sess.run(init) X, names = get_dataset(sess) centroids = initial_cluster_centroids(X, k) i, converged = 0, False while not converged and i < max_iterations: i += 1 Y = assign_cluster(X, centroids) centroids = sess.run(recompute_centroids(X, Y)) print(zip(sess.run(Y), names))
The Chromagram function used in chapter 5 for K-Means classification returns an error when fed an audio file. TypeError: 'float' object cannot be interpreted as an index