Open yaroslavvb opened 1 year ago
I did evaluation of DPR1 decomposition in another task, and the effect on result is within limits of numerical noise
Notebook
Get["https://raw.githubusercontent.com/HenrikSchumacher/\ DPR1Eigensystem/main/DPR1Eigensystem.m"]; Needs["DPR1Eigensystem`"]; stepL1[h_, b_] := With[{r = Total[h]/Max[h]}, If[b == \[Infinity], 2/Max[h], 2/Total[h] b/((b + 1)/r + 1)]]; outer2[h_] := {h}\[Transpose] . {h}; minibatchOperator[h_, \[Alpha]_?NumericQ, b_] := Module[{d, z}, {d, z} = minibatchOperatorFactored[h, \[Alpha], b]; DiagonalMatrix[d] + outer2[z] ]; (* Returns minibatch operator as {diag,rank1} *) minibatchOperatorFactored[h_, \[Alpha]_?NumericQ, b_] := Module[{d = Length[h], H, Hm, ones, ii}, ones = ConstantArray[1., d]; If[b == \[Infinity], {ones - 2 \[Alpha] h + \[Alpha]^2 (h*h), 0*ones}, {ones - 2 \[Alpha] h + (\[Alpha]^2 (b + 1))/b (h*h), 1/Sqrt[b] \[Alpha] h} ] ]; pround[x_] := First[Floor[x] + RandomVariate[BernoulliDistribution[x - Floor@x], 1]]; generateWishartSpectrum[d_, R_] := Module[{b, X}, Assert[R < d, "Wishart too large"]; Assert[R > 1, "Wishart too small"]; b = (-2 + R + d R)/Max[d - R, 1]; X = RandomVariate[NormalDistribution[], {pround[b], d}]/b; SingularValueList[X]^2 ]; generatePowerSpectrum[d_, R_] := Module[{h, decay}, decay = If[d > 10 && R < 2, 100., dd /. FindRoot[ HarmonicNumber[d, dd]^2/HarmonicNumber[d, 2 dd] == R, {dd, 2, 0, 100}]]; h = Table[i^-decay, {i, 1, d}]; h/Total[h] ]; stepComplexity[h_, lossFactor_, \[Alpha]_, b_] := Module[{d, c0, TT, evals, evecs, partialProduct, fastMatrixPower, objfunc, loss0, lossTarget, k}, Assert[lossFactor > 2]; d = Length[h]; c0 = ConstantArray[1., {d}]; If[b == \[Infinity], fastMatrixPower = Compile[{{k, _Real}}, Total[(1. - \[Alpha] h)^(2 k)*h]], (* SGD case *) TT = minibatchOperator[h, \[Alpha], b]; {evals, evecs} = Eigensystem[TT]; partialProduct = Inverse[evecs\[Transpose]] . c0; (* 100x faster version of MatrixPower[mat,k,c0].h *) fastMatrixPower = Compile[{{k, _Real}}, (evecs\[Transpose] . (evals^k* partialProduct)) . h] ]; objfunc[k_?NumericQ] := fastMatrixPower[k]; (* factor of 2 because C=T.C.T *); loss0 = Total[h]; lossTarget = loss0/lossFactor; If[objfunc[1] < lossTarget, 1, (* special case of converging in 1 step *) k /. FindRoot[objfunc[k] == lossTarget, {k, 2}, AccuracyGoal -> 4] ] ]; stepComplexityDPR1[h_, lossFactor_, \[Alpha]_, b_] := Module[{d, c0, TT, evals, evecs, partialProduct, fastMatrixPower, objfunc, loss0, lossTarget, k}, Assert[lossFactor > 2]; d = Length[h]; c0 = ConstantArray[1., {d}]; If[b == \[Infinity], fastMatrixPower = Compile[{{k, _Real}}, Total[(1. - \[Alpha] h)^(2 k)*h]], (* SGD case *) {dd, z} = minibatchOperatorFactored[h, \[Alpha], b]; {evals, evecs} = DPR1Eigensystem[dd, z]; partialProduct = evecs . c0; (* 100x faster version of MatrixPower[mat,k,c0].h *) fastMatrixPower = Compile[{{k, _Real}}, (evecs\[Transpose] . (evals^k* partialProduct)) . h] ]; objfunc[k_?NumericQ] := fastMatrixPower[k]; (* factor of 2 because C=T.C.T *); loss0 = Total[h]; lossTarget = loss0/lossFactor; If[objfunc[1] < lossTarget, 1, (* special case of converging in 1 step *) k /. FindRoot[objfunc[k] == lossTarget, {k, 2}, AccuracyGoal -> 4] ] ]; d = 400; spectra = Table[generatePowerSpectrum[d, R], {R, Ceiling[d*0.1], Floor[0.5*d], 10}]; b = 1; lossFactor = 100; steps1 = stepComplexity[#, lossFactor, 0.5*stepL1[#, b], b] & /@ spectra; steps2 = stepComplexityDPR1[#, lossFactor, 0.5*stepL1[#, b], b] & /@ spectra; ListPlot[{steps1, steps2}, PlotLegends -> {"original", "DPR1"}] Print["Largest relative error: ", Max[(steps1 - steps2)/steps1]]
I did evaluation of DPR1 decomposition in another task, and the effect on result is within limits of numerical noise
Notebook