BinRoot / TensorFlow-Book

Accompanying source code for Machine Learning with TensorFlow. Refer to the book for step-by-step explanations.
http://www.tensorflowbook.com
MIT License
4.45k stars 1.21k forks source link

Bregman lib error #42

Open Pipe-Runner opened 6 years ago

Pipe-Runner commented 6 years ago

The Chromagram function used in chapter 5 for K-Means classification returns an error when fed an audio file. TypeError: 'float' object cannot be interpreted as an index

zoldaten commented 3 years ago

this error from Bregman lib that doesnt work with lastest numpy. try to install from here for python3 https://github.com/pkmital/BregmanToolkit Then change in features_base.py: num_frames = 1000 #len(self.x)

try this code first:

from bregman.suite import *

p = default_feature_params() import os path=r'./audio_dataset/' os.chdir(path)

audio_file = ("amen.wav") print(audio_file) F = Features(audio_file, p) imagesc(F.X,dbscale=True)

title=title('Default constant-Q spectrogram')

F.inverse(F.X, pvoc=True) # invert features to audio play(balance_signal(F.x_hat),F.sample_rate)

p['feature']='stft' p['nfft']=1024 p['wfft']=512 p['nhop']=256 F = Features(audio_file, p) imagesc(F.X,dbscale=True) title('Wide-band spectrogram')

F.inverse(F.X) # invert features to audio play(balance_signal(F.x_hat),F.sample_rate)

tuts = get_tutorials() execfile(tuts[0])

execfile(tuts[1])

zoldaten commented 3 years ago

To fix the issue with python2 - https://github.com/BinRoot/BregmanToolkit/pull/1/commits/f7b924b8a4e67ae5ceac3d1285acf35f4395112d

zoldaten commented 3 years ago

import tensorflow as tf import numpy as np from bregman.suite import * import os

path=r'./audio_dataset/' os.chdir(path) print(os.getcwd())

k = 2 max_iterations = 100

filenames = tf.train.match_filenames_once('*.wav') count_num_files = tf.size(filenames)

print(count_num_files)

init = (tf.global_variables_initializer(), tf.local_variables_initializer())

filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() filename, file_contents = reader.read(filename_queue)

chromo = tf.placeholder(tf.float32) max_freqs = tf.argmax(chromo, 0)

def get_next_chromogram(sess): audio_file = sess.run(filename) F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205) return F.X, audio_file

def extract_feature_vector(sess, chromo_data): num_features, num_samples = np.shape(chromo_data) freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data}) hist, bins = np.histogram(freq_vals, bins=range(num_features + 1)) normalized_hist = hist.astype(float) / num_samples return normalized_hist

def get_dataset(sess): num_files = sess.run(count_num_files) coord = tf.train.Coordinator() threads = tf.train.start_queuerunners(coord=coord) xs = list() names = list() plt.figure() for in range(num_files): chromo_data, filename = get_next_chromogram(sess)

    plt.subplot(1, 2, 1)
    plt.imshow(chromo_data, cmap='Greys', interpolation='nearest')
    plt.title('Visualization of Sound Spectrum')

    plt.subplot(1, 2, 2)
    freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
    plt.hist(freq_vals)
    plt.title('Histogram of Notes')
    plt.xlabel('Musical Note')
    plt.ylabel('Count')
    plt.savefig('{}.png'.format(filename))
    plt.clf()

    plt.clf()
    names.append(filename)
    x = extract_feature_vector(sess, chromo_data)
    xs.append(x)
xs = np.asmatrix(xs)
return xs, names

def initial_cluster_centroids(X, k): return X[0:k, :]

def assign_cluster(X, centroids): expanded_vectors = tf.expand_dims(X, 0) expanded_centroids = tf.expand_dims(centroids, 1) distances = tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroids)), 2) mins = tf.argmin(distances, 0) return mins

def recompute_centroids(X, Y): sums = tf.unsorted_segment_sum(X, Y, k) counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k) return sums / counts

with tf.Session() as sess: sess.run(init) X, names = get_dataset(sess) centroids = initial_cluster_centroids(X, k) i, converged = 0, False while not converged and i < max_iterations: i += 1 Y = assign_cluster(X, centroids) centroids = sess.run(recompute_centroids(X, Y)) print(zip(sess.run(Y), names))