Open cydcydcyd opened 5 years ago
Hi,
I am trying this on the branch 4.1 with the regression data in examples/Supervised/data
#include <shark/Models/Kernels/GaussianRbfKernel.h>
#include <shark/Algorithms/Trainers/EpsilonSvmTrainer.h>
#include <shark/ObjectiveFunctions/Loss/SquaredLoss.h>
#include <shark/ObjectiveFunctions/Loss/EpsilonHingeLoss.h>
#include <shark/Data/Dataset.h>
#include <shark/Data/Csv.h>
using namespace shark;
int main()
{
// experiment settings
double gamma = 0.1;
double epsilon = 0.03;
GaussianRbfKernel<> kernel(0.1);
SquaredLoss<> loss;
EpsilonHingeLoss epsLoss(epsilon);
// generate dataset
Data<RealVector> inputs;
Data<RealVector> labels;
importCSV(inputs, "regression_input.csv", ',');
importCSV(labels, "regression_label.csv", ',');
RegressionDataset data(inputs, labels);
data= shuffle(data);
RegressionDataset test = splitAtElement(data, static_cast<std::size_t>(0.9*data.numberOfElements()));
// define the machines
KernelExpansion<RealVector> svm;
for(double C=0.1; C <=10000.0; C*=10){
// define the corresponding trainers
EpsilonSvmTrainer<RealVector> trainer(&kernel, C, epsilon);
trainer.train(svm, data);
std::cout<<"C= "<<C<<std::endl;
std::cout<<" weights min/max:"<< max(svm.alpha())<<" "<<min(svm.alpha())<<std::endl;
Data<RealVector> output = svm(data.inputs());
double train_error = loss.eval(data.labels(), output);
double train_eps_error = epsLoss.eval(data.labels(), output);
std::cout << "training error: " << train_error <<" "<<train_eps_error<< std::endl;
output = svm(test.inputs());
double test_error = loss.eval(test.labels(), output);
double test_eps_error = epsLoss.eval(test.labels(), output);
std::cout << " test error: " << test_error <<" "<<test_eps_error<< "\n\n";
}
}
it seems to be working for me. There is an issue in your code that you are doing the training/test split wrong, i think, at least there is going to be unused data leftover (you first split of 10% as training and from the remaining 90% half as test, leaving 45% untouched). The output looks quite correct to me.
You might have a problem with the epsilon in your data. Maybe this is too large? epsilon gives the maximum amount of error that you are willing to tolerate as noise.
`#define NOMINMAX //=========================================================================== /! *
*/ //===========================================================================
include <shark/LinAlg/Base.h>
include <shark/Core/Random.h>
include <shark/Models/Kernels/GaussianRbfKernel.h>
include <shark/Algorithms/Trainers/EpsilonSvmTrainer.h>
include <shark/Algorithms/Trainers/RegularizationNetworkTrainer.h>
include <shark/ObjectiveFunctions/Loss/SquaredLoss.h>
include <shark/Data/Dataset.h>
include <shark/Data/DataDistribution.h>
include <shark/Data/Csv.h>
using namespace shark;
int main() { // experiment settings unsigned int ell = 200; unsigned int tests = 10000; double C = 10.0; double gamma = 1.0 / C; double epsilon = 0.03;
} `