`#define NOMINMAX //=========================================================================== /! *

\brief Kernel-based regression methods example program.
\author T. Glasmachers
\date -
\par Copyright 1995-2017 Shark Development Team
This file is part of Shark.
http://shark-ml.org/
Shark is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Shark is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with Shark. If not, see http://www.gnu.org/licenses/.
*/ //===========================================================================

include <shark/LinAlg/Base.h>

include <shark/Core/Random.h>

include <shark/Models/Kernels/GaussianRbfKernel.h>

include <shark/Algorithms/Trainers/EpsilonSvmTrainer.h>

include <shark/Algorithms/Trainers/RegularizationNetworkTrainer.h>

include <shark/ObjectiveFunctions/Loss/SquaredLoss.h>

include <shark/Data/Dataset.h>

include <shark/Data/DataDistribution.h>

include <shark/Data/Csv.h>

using namespace shark;

int main() { // experiment settings unsigned int ell = 200; unsigned int tests = 10000; double C = 10.0; double gamma = 1.0 / C; double epsilon = 0.03;

GaussianRbfKernel<> kernel(0.1);
SquaredLoss<> loss;

// generate dataset

Data<RealVector> inputs;
Data<RealVector> labels;
importCSV(inputs, "regression_input.csv", ',');
importCSV(labels, "regression_label.csv", ',');

RegressionDataset data(inputs, labels);
data.shuffle();
RegressionDataset training = splitAtElement(data, static_cast<std::size_t>(0.9*data.numberOfElements()));
RegressionDataset test = splitAtElement(data, static_cast<std::size_t>(0.5*data.numberOfElements()));

// define the machines
KernelExpansion<RealVector> svm[2] = {
    KernelExpansion<RealVector>(),
    KernelExpansion<RealVector>()
};

// define the corresponding trainers
AbstractTrainer<KernelExpansion<RealVector> >* trainer[2];
trainer[0] = new EpsilonSvmTrainer<RealVector>(&kernel, C, epsilon);
trainer[1] = new RegularizationNetworkTrainer<RealVector>(&kernel, gamma);

for (unsigned int i = 0; i<2; i++)
{
    std::cout << "METHOD" << (i + 1) << " " << trainer[i]->name().c_str() << std::endl;
    std::cout << "training ..." << std::flush;
    trainer[i]->train(svm[i], training);
    std::cout << "done" << std::endl;

    Data<RealVector> output = svm[i](training.inputs());
    double train_error = loss.eval(training.labels(), output);
    std::cout << "training error: " << train_error << std::endl;
    output = svm[i](test.inputs());
    double test_error = loss.eval(test.labels(), output);
    std::cout << "    test error: " << test_error << "\n\n";
}

delete trainer[0];
delete trainer[1];
return 0;

} `

the train_error is small ,but the test error is very large. the result of each sample is the same value as shown in the picture. The trainer seems to have no effect on the test data.

Hi,

I am trying this on the branch 4.1 with the regression data in examples/Supervised/data

#include <shark/Models/Kernels/GaussianRbfKernel.h>
#include <shark/Algorithms/Trainers/EpsilonSvmTrainer.h>
#include <shark/ObjectiveFunctions/Loss/SquaredLoss.h>
#include <shark/ObjectiveFunctions/Loss/EpsilonHingeLoss.h>
#include <shark/Data/Dataset.h>
#include <shark/Data/Csv.h>

using namespace shark;

int main()
{
    // experiment settings
    double gamma = 0.1;
    double epsilon = 0.03;

    GaussianRbfKernel<> kernel(0.1);
    SquaredLoss<> loss;
    EpsilonHingeLoss epsLoss(epsilon);

    // generate dataset

    Data<RealVector> inputs;
    Data<RealVector> labels;
    importCSV(inputs, "regression_input.csv", ',');
    importCSV(labels, "regression_label.csv", ',');

    RegressionDataset data(inputs, labels);
    data= shuffle(data);
    RegressionDataset test = splitAtElement(data, static_cast<std::size_t>(0.9*data.numberOfElements()));

    // define the machines
    KernelExpansion<RealVector> svm;
    for(double C=0.1; C <=10000.0; C*=10){

        // define the corresponding trainers
        EpsilonSvmTrainer<RealVector> trainer(&kernel, C, epsilon);
        trainer.train(svm, data);
        std::cout<<"C= "<<C<<std::endl;
        std::cout<<" weights min/max:"<< max(svm.alpha())<<" "<<min(svm.alpha())<<std::endl;
        Data<RealVector> output = svm(data.inputs());
        double train_error = loss.eval(data.labels(), output);
        double train_eps_error = epsLoss.eval(data.labels(), output);
        std::cout << "training error: " << train_error <<" "<<train_eps_error<< std::endl;
        output = svm(test.inputs());
        double test_error = loss.eval(test.labels(), output);
        double test_eps_error = epsLoss.eval(test.labels(), output);
        std::cout << "    test error: " << test_error <<" "<<test_eps_error<< "\n\n";
    }
}

it seems to be working for me. There is an issue in your code that you are doing the training/test split wrong, i think, at least there is going to be unused data leftover (you first split of 10% as training and from the remaining 90% half as test, leaving 45% untouched). The output looks quite correct to me.

You might have a problem with the epsilon in your data. Maybe this is too large? epsilon gives the maximum amount of error that you are willing to tolerate as noise.

Shark-ML / Shark

The test result of EpsilonSvmTrainer is the same value. #271