Open ChocDino-Andy opened 11 months ago
https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-40-incremental-computation-gaussian
float g0, g1, g2;
g0 = 1.0 / (sqrt(2.0 PI) sigma); g1 = exp(-0.5 delta delta / (sigma sigma)); g2 = g1 g1;
for (i = 0; i < N; i++) { MultiplySomethingByTheGaussianCoefficient(g0);
g0 = g1; g1 = g2; }
float3 g; g.x = 1.0 / (sqrt(2.0 PI) sigma); g.y = exp(-0.5 delta delta / (sigma sigma)); g.z = g.y g.y; for (i = 0; i < N; i++) { MultiplySomethingByTheGaussianCoefficient(g.x); g.xy *= g.yz; } #endif // USE_VECTOR_INSTRUCTIONS
https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-40-incremental-computation-gaussian
ifdef USE_SCALAR_INSTRUCTIONS // suitable for scalar GPUs
float g0, g1, g2;
g0 = 1.0 / (sqrt(2.0 PI) sigma); g1 = exp(-0.5 delta delta / (sigma sigma)); g2 = g1 g1;
for (i = 0; i < N; i++) { MultiplySomethingByTheGaussianCoefficient(g0);
g0 = g1;
g1 = g2; }
else // especially for vector architectures
float3 g; g.x = 1.0 / (sqrt(2.0 PI) sigma); g.y = exp(-0.5 delta delta / (sigma sigma)); g.z = g.y g.y; for (i = 0; i < N; i++) { MultiplySomethingByTheGaussianCoefficient(g.x); g.xy *= g.yz; } #endif // USE_VECTOR_INSTRUCTIONS