MTG / essentia

C++ library for audio and music analysis, description and synthesis, including Python bindings
http://essentia.upf.edu
GNU Affero General Public License v3.0
2.81k stars 525 forks source link

Creating a example for EffnetDiscogs #1283

Closed simdax closed 9 months ago

simdax commented 1 year ago

Hello all

I have to admit I'm not very familiar with AI, so I'm struggling to create a simple example that would work as the other tensorflow examples, musicnn or vggish, in CPP.

In python I have a result with this code:


audio = MonoLoader(filename="../data/raw/blues/blues.00000.wav", sampleRate=16000)()
model = TensorflowPredictEffnetDiscogs(graphFilename="../models/discogs-effnet-bs64-1.pb")
activations = model(audio)

#    [   INFO   ] TensorflowPredict: Successfully loaded graph file: `../models/discogs-effnet-bs64-1.pb`

activations_mean = np.mean(activations, axis=0)
top_n_idx = np.argsort(activations_mean)[::-1][0]

I've just copied these files which are all similar, but it does not seem to work for me sadly.

Anyone could help :) ? Thank you


#include <iostream>
#include <essentia/algorithmfactory.h>
#include <essentia/streaming/algorithms/poolstorage.h>
#include <essentia/scheduler/network.h>
#include "credit_libav.h"

using namespace std;
using namespace essentia;
using namespace essentia::streaming;
using namespace essentia::scheduler;

bool hasFlag(char** begin, char** end, const string& option) {
  return find(begin, end, option) != end;
}

string getArgument(char** begin, char** end, const string& option) {
  char** iter = find(begin, end, option);
  if (iter != end && ++iter != end) return *iter;

  return string();
}

void printHelp(string fileName) {
    cout << "Usage: " << fileName << " pb_graph audio_input output_json [--help|-h] [--list-nodes|-l] [--patchwise|-p] [[-output-node|-o] node_name]" << endl;
    cout << "  -h, --help: print this help" << endl;
    cout << "  -l, --list-nodes: list the nodes in the input graph (model)" << endl;
    cout << "  -p, --patchwise: write out patch-wise predctions (one per patch) instead of averaging them" << endl;
    cout << "  -o, --output-node: node (layer) name to retrieve from the graph (default: model/Sigmoid)" << endl;
    creditLibAV();
}

vector<string> flags({"-h", "--help",
                      "-l", "--list-nodes",
                      "-p", "--patchwise",
                      "-o", "--output-node"});

int main(int argc, char* argv[]) {
  // Sanity check for the command line options.
  for (char** iter = argv; iter < argv + argc; ++iter) {
    if (**iter == '-') {
      string flag(*iter);
      if (find(flags.begin(), flags.end(), flag) == flags.end()){
        cout << argv[0] << ": invalid option '" << flag << "'" << endl;
        printHelp(argv[0]);
        exit(1);
      }
    }
  }

  string outputLayer = "PartitionedCall";

  string graphName = argv[1];
  string audioFilename = argv[2];
  string outputFilename = argv[3];

  // rather to output the patch-wise predictions or to average them.
  const bool average = (hasFlag(argv, argv + argc, "--patchwise") ||
                        hasFlag(argv, argv + argc, "-p")) ? false : true;

  // register the algorithms in the factory(ies)
  essentia::init();

  Pool pool;
  Pool aggrPool;  // a pool for the the aggregated predictions
  Pool* poolPtr = &pool;

  /////// PARAMS //////////////
  Real sampleRate = 16000.0;

  AlgorithmFactory& factory = streaming::AlgorithmFactory::instance();

  Algorithm* audio = factory.create("MonoLoader",
                                    "filename", audioFilename,
                                    "sampleRate", sampleRate);

  Algorithm* tfp   = factory.create("TensorflowPredictEffnetDiscogs",
                                    "graphFilename", graphName,
                                    "output", outputLayer);
  // If the output layer is empty, we have already printed the list of nodes.
  // Exit now.
  if (outputLayer.empty()){
    essentia::shutdown();

    return 0;
  }

  /////////// CONNECTING THE ALGORITHMS ////////////////
  cout << "-------- connecting algos --------" << endl;

  audio->output("audio")     >>  tfp->input("signal");
  tfp->output("predictions") >>  PC(pool, "predictions");

  /////////// STARTING THE ALGORITHMS //////////////////
  cout << "-------- start processing " << audioFilename << " --------" << endl;

  // create a network with our algorithms...
  Network n(audio);
  // ...and run it, easy as that!
  n.run();

  if (average) {
    // aggregate the results
    cout << "-------- averaging the predictions --------" << endl;

    const char* stats[] = {"mean"};

    standard::Algorithm* aggr = standard::AlgorithmFactory::create("PoolAggregator",
                                                                  "defaultStats", arrayToVector<string>(stats));

    aggr->input("input").set(pool);
    aggr->output("output").set(aggrPool);
    aggr->compute();

    poolPtr = &aggrPool;

    delete aggr;
  }

  // write results to file
  cout << "-------- writing results to json file " << outputFilename << " --------" << endl;

  standard::Algorithm* output = standard::AlgorithmFactory::create("YamlOutput",
                                                                   "format", "json",
                                                                   "filename", outputFilename);
  output->input("pool").set(*poolPtr);
  output->compute();
  n.clear();

  delete output;
  essentia::shutdown();

  return 0;
}

compiling it and activating it like these ./build/src/examples/essentia_streaming_discogs test/models/effnetdiscogs/effnetdiscogs-bs64-1.pb test/audio/recorded/mozart_c_major_30sec.wav outpout.json

the result is empty :(

{
"metadata": {
    "version": {
        "essentia": "2.1-beta6-dev"
    }
}
}

Thank you very much :)

palonso commented 1 year ago

Hi @simdax, the Effnet model you are using requires a minimum audio length of ~60 seconds. In standard mode we simply pad shorter audios, but the feature is not implemented in streaming mode.

Could you try with a longer audio to verify your implementation?

dbogdanov commented 9 months ago

Reopen if this is still an issue.