MTG / essentia

C++ library for audio and music analysis, description and synthesis, including Python bindings
http://essentia.upf.edu
GNU Affero General Public License v3.0
2.85k stars 533 forks source link

I get different spectral centroid values. Don't know which is correct. #907

Open ruski15 opened 5 years ago

ruski15 commented 5 years ago

Hi,

When I run my files through musicextractor.py and example_getspectralcentroid.py I get different values and I don't know why and which one is correct.

This is one of the audios: 0_dn_cnt.wav.zip

Results: Archive.zip

And the code for reference below.

Thanks a lot!

This is musicextractor.py

# Copyright (C) 2006-2016  Music Technology Group - Universitat Pompeu Fabra
#
# This file is part of Essentia
#
# Essentia is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

from essentia.standard import MusicExtractor, YamlOutput
from essentia import EssentiaError
from argparse import ArgumentParser
import os
import sys

def music_extractor(audio_file, sig_file, profile=None, store_frames=False, format='yaml'):
    if profile:
        extractor = MusicExtractor(profile=profile)
    else:
        extractor = MusicExtractor()

    poolStats, poolFrames = extractor(audio_file)

    folder = os.path.dirname(sig_file)

    if not os.path.exists(folder):
        os.makedirs(folder)
    elif os.path.isfile(folder):
        raise EssentiaError('Cannot create directory {} .There exist a file with the same name. Aborting analysis.'.format(folder))

    output = YamlOutput(filename=sig_file+'.sig', format=format)
    output(poolStats)
    if store_frames:
        YamlOutput(filename=sig_file + '.frames.sig', format=format)(poolFrames)

if __name__ == '__main__':
    parser = ArgumentParser(description = """
Analyzes an audio file using MusicExtractor.
""")

    parser.add_argument('audio_file', help='audio file name')
    parser.add_argument('sig_file', help='sig file name')
    parser.add_argument('--profile', help='MusicExtractor profile', required=False)
    parser.add_argument('--store_frames', help='store frames data', action='store_true', required=False)
    parser.add_argument('--format', help='yaml or json', default='yaml', choices=['yaml', 'json'])
    args = parser.parse_args()

    music_extractor(args.audio_file, args.sig_file, profile=args.profile, store_frames=args.store_frames, format=args.format)

This is example_getspectralcentroid.py

#!/usr/bin/env python

# Copyright (C) 2006-2016  Music Technology Group - Universitat Pompeu Fabra
#
# This file is part of Essentia
#
# Essentia is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

from essentia import *
from essentia.standard import *

def extractor(filename):
    # load our audio into an array
    audio = MonoLoader(filename=filename, sampleRate=44100)()

    # create the pool and the necessary algorithms
    pool = Pool()
    w = Windowing()
    spec = Spectrum()
    centroid = Centroid(range=22050)

    # compute the centroid for all frames in our audio and add it to the pool
    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        c = centroid(spec(w(frame)))
        pool.add('lowlevel.centroid', c)

    # aggregate the results
    aggrpool = PoolAggregator(defaultStats = [ 'mean', 'stdev' ])(pool)

    # write result to file
    YamlOutput(filename = filename+'.features.yaml')(aggrpool)

# some python magic so that this file works as a script as well as a module
# if you do not understand what that means, you don't need to care
if __name__ == '__main__':
    import sys

    try:
        input_file = sys.argv[1]
    except:
        print("usage: %s <input_file>" % sys.argv[0])
        sys.exit()

    extractor(sys.argv[1])
    print('Results written to %s' % input_file+'.features.yaml')
dbogdanov commented 5 years ago

When using MusicExtractor, some descriptors are computed with equal-loudness filtering. This includes spectral_centroid, therefore the difference.