Fix error in this code - Githubissues

void GenerativeGramConsistencyTest(ofstream &_tex,unsigned int &n) { //Notes from On the Nystrom Method for Approximating a Gram Matrix for Improved Kernel-Based Learning by //Petros Drineas DRINEP@CS.RPI.EDU Department of Computer Science Rensselaer Polytechnic Institute

//Given a collection X of data points, which are often but not necessarily elements of R^m, techniques
//such as PCA, SVM, GP and SVD can be used to identify structure from X by computing linear functions.
//PCA calculates the subspace spanned by the first k eigenvectors and is used to give a k dimensional
//model of the data with minimal residual.  This type of  spectral analysis has a long theoretical history and forms the basis 
//of many modern machine learning techniques. In real world applications however there is typically nonlinear structure in the data.
//Some domains sucha as Natural Language processing (NLP) data may not be amenable to the definition of meaningful linear operations.
//It is for such situations that kernel learning methods were developed.  Kernel methods map data in to high 
//dimensional space and use inner products designed to capture positional information.  Kernel methods use 
//the inner product distances for classification and regression.Kernel methods exploit the information encoded in the inner product
//between all pairs of data items and are successful because there is often an efficient method to
//compute inner products between very complex infinite dimensional vectors. Kernel
//algorithms provide a way to deal with nonlinear structure by reducing nonlinear algorithms
//to algorithms that are linear in some feature space F.  F is nonlinearly related to the original input
//space.  
//Let x_i \ in \dblr^m X = [  ...x_i ... ] rowsiwe. Here we test the linear structure of a genreative data set against the
//Gream kernel. In kernel-based methods, a set of features is chosen that define a space F , where it is hoped relevant structure will be 
//revealed, the data X are then mapped to the feature space F using a mapping F : X  -> F , and then classification, regression, or
//clustering is performed in F using traditional methods such as linear SVMs, GPs, or PCA. If F
//is chosen to be a dot product space and if one defines the kernel matrix, also known as the Gram
//matrix, G \in  \dlbr^{n×n} as G_ij = k(x_i, x_j) = (F(xi),F(xj)), then any algorithm whose operations can be
//expressed in the input space in terms of dot products can be generalized to an algorithm which
//operates in the feature space by substituting a kernel function for the inner product. In practice, this
//means presenting the Gram matrix G in place of the input covariance matrix X^\dag X. Relatedly, using
//the kernel k instead of a dot product in the input space corresponds to mapping the data set into a
//(usually) high-dimensional dot product space F by a (usually nonlinear) mapping \Phi : \dblr^m -> F , and
//taking dot products there, i.e., k(x_i, x_j) = (F(x_i),F(x_j)). Note that for the commonly-used Mercer
//kernels, G is a symmetric positive semidefinite (SPSD) matrix.
//The generality of this framework should be emphasized. For example, there has been much
//work recently on dimensionality reduction for nonlinear manifolds in high-dimensional spaces. 
//Ie Isomap, LLE, and graph Laplacian eigenmap, and Hessian eigenmaps.  These methods first induce a local neighborhood 
//structure on the data and then use this local structure to find a global embedding of the manifold in a lower dimensional space.

unsigned int numFeatures=3;
unsigned int sampleSize=4096;
//Is this PSD? If not then we will not get a good Cholesky factorization? 
//One way to find out is to find eigs, and see is \sigma(W) \in [0,\inflty)
klMatrix<double> SigmaW=SampleWishart(numFeatures);

cout<<"Sample Size = "<<sampleSize<<endl;

cout<<"Feature dim = "<<numFeatures<<endl<<endl;

cout<<"Generative W = "<<endl<<SigmaW<<endl;

klPrincipalComponents<double> pcaWishart=SigmaW;
klMatrix<double> V=pcaWishart(2);
klVector<double> wishartEigs=pcaWishart.eigenvalues();
cout<<"W eigs = "<<endl<<wishartEigs<<endl;

unsigned int i;
unsigned int j;
klVector<double> meanVector(numFeatures);
meanVector=1;

klNormalMultiVariate<double> T(meanVector,SigmaW );
klSamplePopulation<double> X(sampleSize,numFeatures);
for(j=0;j<sampleSize;j++)
{
    klVector<double> tv=T();
    X.setRow(j,tv);
}

klMatrix<double> SampleCovariance = X.covarianceMatrix();

cout<<"Sample Covariance = "<<endl<<SampleCovariance<<endl;
cout<<"Sample mean = " <<endl<<X.sampleMean()<<endl;

//This should be small
cout<<"SampleCovariance-SigmaW = " <<endl<<SampleCovariance-SigmaW<<endl;

klPrincipalComponents<double> pcaCovariance=SampleCovariance;
klMatrix<double> VC =pcaCovariance(2);
klVector<double> covarianceEigs=pcaCovariance.eigenvalues();
cout<<"Sample Covariance eigs ="<<endl<< covarianceEigs<<endl;

//How close is the sample covariance to a PSD matrix?
//First we'll need a measure of matrix closeness and a way to find the 
//nearest PSD matrix in that measure. 

//The Gram matrix G of a set of vectors x_1, ...,x_i,...x_n in an inner product space is the Hermitian matrix of inner products, whose entries are given by G_{ij} = <x_i,x_j>
//An important application is to compute linear independence: a set of vectors is linearly independent if and only if the determinant of the Gram matrix is non-zero.

klMatrix<double> G(sampleSize,sampleSize);
G.makeNanFriendly();
for(i=0;i<sampleSize;i++)
{
    klVector<double> x_i= X[i];
    for(j=0;j<sampleSize;j++)
    {
        klVector<double> x_j= X[j];
        G[i][j]=(x_i).dotBLAS(x_j);
    }
}

klMatrix<double> Gf(numFeatures,numFeatures);

klMatrix<double> centered = X.centeredData();
klSamplePopulation<double> normalizedSample(centered);
klVector<double> normedMean = normalizedSample.sampleMean();

cout<<"Centered Mean = "<<endl<<normalizedSample.sampleMean()<<endl;
cout<<"Centered Covariance = "<<endl<<normalizedSample.covarianceMatrix() <<endl;

for(i=0;i<numFeatures;i++)
{
    klVector<double> x_i= centered.getColumn(i);
    for(j=0;j<numFeatures;j++)
    {
        klVector<double> x_j= centered.getColumn(j);
        Gf[i][j]=(x_i).dotBLAS(x_j);
    }
}
cout<<"Gram Matrix Gf Not scaled by sample size = "<<endl<<  Gf  <<endl;

cout<<"Gram Matrix Gf  scaled by sample size = "<<endl<<  Gf / (double) sampleSize <<endl;

klMatrix<double> diff = SampleCovariance / (double) sampleSize;

cout<<"SampleCovariance - Scaled Gf="<<endl<<diff<<endl;

cout<<"EigenDecomp of SampleCovariance = "<<endl<<VC<<endl;

klPrincipalComponents<double> pcaGram=Gf;
klMatrix<double> VCGram =pcaGram(2);
cout<<"EigenDecomp of Gram Matrix = "<<endl<<VCGram<<endl;

}

//////////////////OUTPUT Sample Size = 4096 Feature dim = 3

Generative W = 1.13984, 1.53508, 0.581436 1.53508, 9.98839, 1.60541 0.581436, 1.60541, 0.427752

W eigs = 16.5614, -4.8367e-026, -4.8367e-026

Sample Covariance = 1.16691, 1.63713, 0.60455 1.63713, 10.1498, 1.66286 0.60455, 1.66286, 0.444172

Sample mean = 0.995103, 1.01065, 1.00277

SampleCovariance-SigmaW = 0.0270718, 0.102049, 0.0231138 0.102049, 0.161428, 0.0574498 0.0231138, 0.0574498, 0.0164199

Sample Covariance eigs = 16.9632, -4.8367e-026, -4.8367e-026

Centered Mean = -6.55834e-016, -2.3782e-016, -3.33154e-015

Centered Covariance = 1.16691, 1.63713, 0.60455 1.63713, 10.1498, 1.66286 0.60455, 1.66286, 0.444172

Gram Matrix Gf Not scaled by sample size = 4779.68, 6704.03, 2475.63 6704.03, 41573.7, 6809.41 2475.63, 6809.41, 1819.33

Gram Matrix Gf scaled by sample size = 1.16691, 1.63673, 0.604403 1.63673, 10.1498, 1.66245 0.604403, 1.66245, 0.444172

SampleCovariance - Scaled Gf= 0.000284891, 0.000399689, 0.000147595 0.000399689, 0.00247798, 0.000405972 0.000147595, 0.000405972, 0.00010844

EigenDecomp of SampleCovariance = -0.12675, -0.973444, -0.190633 -0.348345, 0.223624, -0.910301 0.928757, -0.0489751, -0.367439

EigenDecomp of Gram Matrix = -0.126694, -0.973463, -0.190575 -0.348225, 0.223543, -0.910367 0.92881, -0.0489749, -0.367306

wavescholar / klMatrixCore

Fix error in this code #7