HenrikSchumacher / DPR1Eigensystem

A Mathematica implementation of an eigensolver for symmetric, positive-definite rank-one modifications of diagonal matrices.
MIT License
0 stars 0 forks source link

evaluation of DPR1 accuracy in an application #1

Open yaroslavvb opened 1 year ago

yaroslavvb commented 1 year ago

I did evaluation of DPR1 decomposition in another task, and the effect on result is within limits of numerical noise Screen Shot 2023-02-28 at 5 30 21 PM

Notebook

Get["https://raw.githubusercontent.com/HenrikSchumacher/\
DPR1Eigensystem/main/DPR1Eigensystem.m"];
Needs["DPR1Eigensystem`"];

stepL1[h_, b_] := 
  With[{r = Total[h]/Max[h]}, 
   If[b == \[Infinity], 2/Max[h], 2/Total[h] b/((b + 1)/r + 1)]];
outer2[h_] := {h}\[Transpose] . {h};

minibatchOperator[h_, \[Alpha]_?NumericQ, b_] := Module[{d, z},
   {d, z} = minibatchOperatorFactored[h, \[Alpha], b];
   DiagonalMatrix[d] + outer2[z]
   ];

(* Returns minibatch operator as {diag,rank1} *) 
minibatchOperatorFactored[h_, \[Alpha]_?NumericQ, b_] := 
  Module[{d = Length[h], H, Hm, ones, ii},
   ones = ConstantArray[1., d];
   If[b == \[Infinity],

    {ones - 2 \[Alpha] h + \[Alpha]^2 (h*h), 0*ones},
    {ones - 2 \[Alpha] h + (\[Alpha]^2 (b + 1))/b (h*h), 
     1/Sqrt[b] \[Alpha] h}
    ]
   ];

 pround[x_] := 
  First[Floor[x] + 
    RandomVariate[BernoulliDistribution[x - Floor@x], 1]];

generateWishartSpectrum[d_, R_] := Module[{b, X},
   Assert[R < d, "Wishart too large"];
   Assert[R > 1, "Wishart too small"];
   b = (-2 + R + d R)/Max[d - R, 1];
   X = RandomVariate[NormalDistribution[], {pround[b], d}]/b;
   SingularValueList[X]^2
   ];

generatePowerSpectrum[d_, R_] := Module[{h, decay},
   decay = 
    If[d > 10 && R < 2, 100., 
     dd /. FindRoot[
       HarmonicNumber[d, dd]^2/HarmonicNumber[d, 2 dd] == R, {dd, 2, 
        0, 100}]];
   h = Table[i^-decay, {i, 1, d}];
   h/Total[h]
   ];

stepComplexity[h_, lossFactor_, \[Alpha]_, b_] := 
  Module[{d, c0, TT, evals, evecs, partialProduct, fastMatrixPower, 
    objfunc, loss0, lossTarget, k},
   Assert[lossFactor > 2];

   d = Length[h];
   c0 = ConstantArray[1., {d}];

   If[b == \[Infinity],
    fastMatrixPower = 
     Compile[{{k, _Real}}, Total[(1. - \[Alpha] h)^(2 k)*h]],

    (* SGD case *)
    TT = minibatchOperator[h, \[Alpha], b];
    {evals, evecs} = Eigensystem[TT];
    partialProduct = Inverse[evecs\[Transpose]] . c0;
    (* 100x faster version of MatrixPower[mat,k,c0].h *)
    fastMatrixPower = 
     Compile[{{k, _Real}}, (evecs\[Transpose] . (evals^k*
           partialProduct)) . h]
    ];

   objfunc[k_?NumericQ] := fastMatrixPower[k];  (* 
   factor of 2 because C=T.C.T *);
   loss0 = Total[h];
   lossTarget = loss0/lossFactor;
   If[objfunc[1] < lossTarget, 1, (* 
    special case of converging in 1 step *)
    k /. 
     FindRoot[objfunc[k] == lossTarget, {k, 2}, AccuracyGoal -> 4]
    ]
   ];

stepComplexityDPR1[h_, lossFactor_, \[Alpha]_, b_] := 
  Module[{d, c0, TT, evals, evecs, partialProduct, fastMatrixPower, 
    objfunc, loss0, lossTarget, k},
   Assert[lossFactor > 2];

   d = Length[h];
   c0 = ConstantArray[1., {d}];

   If[b == \[Infinity],
    fastMatrixPower = 
     Compile[{{k, _Real}}, Total[(1. - \[Alpha] h)^(2 k)*h]],

    (* SGD case *)
    {dd, z} = minibatchOperatorFactored[h, \[Alpha], b];
    {evals, evecs} = DPR1Eigensystem[dd, z];
    partialProduct = evecs . c0;
    (* 100x faster version of MatrixPower[mat,k,c0].h *)
    fastMatrixPower = 
     Compile[{{k, _Real}}, (evecs\[Transpose] . (evals^k*
           partialProduct)) . h]
    ];

   objfunc[k_?NumericQ] := fastMatrixPower[k];  (* 
   factor of 2 because C=T.C.T *);
   loss0 = Total[h];
   lossTarget = loss0/lossFactor;
   If[objfunc[1] < lossTarget, 1, (* 
    special case of converging in 1 step *)
    k /. 
     FindRoot[objfunc[k] == lossTarget, {k, 2}, AccuracyGoal -> 4]
    ]
   ];

d = 400;

spectra = 
  Table[generatePowerSpectrum[d, R], {R, Ceiling[d*0.1], Floor[0.5*d],
     10}];
b = 1;
lossFactor = 100;

steps1 = 
  stepComplexity[#, lossFactor, 0.5*stepL1[#, b], b] & /@ spectra;
steps2 = 
  stepComplexityDPR1[#, lossFactor, 0.5*stepL1[#, b], b] & /@ 
   spectra;
ListPlot[{steps1, steps2}, PlotLegends -> {"original", "DPR1"}]
Print["Largest relative error: ", Max[(steps1 - steps2)/steps1]]