shogun-toolbox / shogun

Shōgun
http://shogun-toolbox.org
BSD 3-Clause "New" or "Revised" License
3.03k stars 1.04k forks source link

Failed xvalidation with CRandomFourierDotFeatures #4169

Open naoa opened 6 years ago

naoa commented 6 years ago

Issue

CrossValidation with CRandomFourierDotFeatures causes error by unmatching # of vectors and # of label.

Error message

[ERROR] In file /usr/local/src/shogun/src/shogun/classifier/svm/LibLinear.cpp line 113: number of vectors 11 does not match number of training labels 9
terminate called after throwing an instance of 'shogun::ShogunException'

Which version

Master version d1763b83fbc029bae1487201ef56977efdfe43a0

Reproducible code

The following code just changes to use CRandomFourierDotFeatures from https://github.com/shogun-toolbox/shogun/blob/develop/examples/undocumented/libshogun/modelselection_grid_search_linear.cpp .

/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2011 Heiko Strathmann
 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
 */

#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/features/RandomFourierDotFeatures.h>

using namespace shogun;

void print_message(FILE* target, const char* str)
{
    fprintf(target, "%s", str);
}

CModelSelectionParameters* create_param_tree()
{
    CModelSelectionParameters* root=new CModelSelectionParameters();

    CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
    root->append_child(c1);
    c1->build_values(-2.0, 2.0, R_EXP);

    CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
    root->append_child(c2);
    c2->build_values(-2.0, 2.0, R_EXP);

    return root;
}

int main(int argc, char **argv)
{
    init_shogun(&print_message, &print_message, &print_message);

#ifdef HAVE_LAPACK
    int32_t num_subsets=5;
    int32_t num_vectors=11;

    /* create some data */
    SGMatrix<float64_t> matrix(2, num_vectors);
    for (int32_t i=0; i<num_vectors*2; i++)
        matrix.matrix[i]=i;

    /* create num_feautres 2-dimensional vectors */
    CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);

    /* create three labels */
    CBinaryLabels* labels=new CBinaryLabels(num_vectors);
    for (index_t i=0; i<num_vectors; ++i)
        labels->set_label(i, i%2==0 ? 1 : -1);

  int32_t D = 300;
    SGVector<float64_t> params(1);
  float64_t width = 8;
    params[0] = width;
    CRandomFourierDotFeatures* r_features = new CRandomFourierDotFeatures(
            features, D, KernelName::GAUSSIAN, params);

    /* create linear classifier (use -s 2 option to avoid warnings) */
    CLibLinear* classifier=new CLibLinear(L2R_L2LOSS_SVC);

    /* splitting strategy */
    CStratifiedCrossValidationSplitting* splitting_strategy=
            new CStratifiedCrossValidationSplitting(labels, num_subsets);

    /* accuracy evaluation */
    CContingencyTableEvaluation* evaluation_criterium=
            new CContingencyTableEvaluation(ACCURACY);

    /* cross validation class for evaluation in model selection */
    CCrossValidation* cross=new CCrossValidation(classifier, r_features, labels,
            splitting_strategy, evaluation_criterium);

    /* print all parameter available for modelselection
     * Dont worry if yours is not included, simply write to the mailing list */
    classifier->print_modsel_params();

    /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
    CModelSelectionParameters* param_tree=create_param_tree();
    param_tree->print_tree();

    /* handles all of the above structures in memory */
    CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
            cross, param_tree);

    /* set autolocking to false to get rid of warnings */
    cross->set_autolock(false);

    CParameterCombination* best_combination=grid_search->select_model();
    SG_SPRINT("best parameter(s):\n");
    best_combination->print_tree();

    best_combination->apply_to_machine(classifier);
    CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();

    if (result->get_result_type() != CROSSVALIDATION_RESULT)
        SG_SERROR("Evaluation result is not of type CCrossValidationResult!");

    result->print_result();

    /* clean up */
    SG_UNREF(result);
    SG_UNREF(best_combination);
    SG_UNREF(grid_search);
#endif // HAVE_LAPACK
    exit_shogun();

    return 0;
}
vigsterkr commented 6 years ago

@naoa thnx for the report and the example code, we'll look into it!

naoa commented 6 years ago

I have not yet understood out the all class composition, but the reason why seems to be CDenseFeature supports CSubSetStack, but CRandomKitchenSinksDotFeatures does not support CusbSetStack.

https://github.com/shogun-toolbox/shogun/blob/develop/src/shogun/features/DenseFeatures.cpp#L437

https://github.com/shogun-toolbox/shogun/blob/develop/src/shogun/features/RandomKitchenSinksDotFeatures.cpp#L176

Maybe, the CRandomKitchenSinksDotFeatures::get_num_vectors() should return m_subset_stack->get_size().