Enhance Twiddle factors constant table.

shoichiro-yamada commented 11 years ago

Please Code abduct from fftw3.

clFFT-2.0/src/library/generator.stockham.cpp:
#define K2PI 6.2831853071795864769252867665590057683943388
#define by2pi(m, n) ((K2PI * (m)) / (n))
/*
 * Improve accuracy by reducing x to range [0..1/8]
 * before multiplication by 2 * PI.
 */
static void real_cexp(int m, int n, double * si, double * co)
{
     double theta, c, s, t;
     unsigned octant = 0;
     int quarter_n = n;
     n += n; n += n;
     m += m; m += m;
     if (m < 0) m += n;
     if (m > n - m) { m = n - m; octant |= 4; }
     if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
     if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
     theta = by2pi(m, n);
     c = cos(theta); s = sin(theta);
     if (octant & 1) { t = c; c = s; s = t; }
     if (octant & 2) { t = c; c = -s; s = t; }
     if (octant & 4) { s = -s; }
     *co = c;
     *si = s;
}
....
// Twiddle factors
for(size_t k=0; k<(L/radix); k++)
{
        double theta = TWO_PI * ((double)k)/((double)L);
        for(size_t j=1; j<radix; j++)
        {
                //double c = cos(((double)j) * theta);
                //double s = sin(((double)j) * theta);

                double c,s;
                real_cexp(k*j,L,&s,&c);
                s = -s;
                wc[nt]   = c;
                ws[nt++] = s;
        }
}

kknox commented 11 years ago

Hi @shoichiro-yamada FFTW is open source software licensed under GPL, which is incompatible with the Apache license that clMath is released under. We can not accept/integrate code contributions from GPL software without compromising the Apache license that we chose for clMath.

Perhaps you can rephrase your request in more generic terms, such as the desire for more precision in the sin/cos twiddle tables in the library.

You have not explained the reason for your request. Why do you feel the accuracy is not good enough?

shoichiro-yamada commented 11 years ago

Hi @kknox

I know you are computer scientist,but you want.

On my mersenne prime lucas lamer test program.

orignal clfft-2.0

Iteration 10000 M( 38690000 )C, 0x7acca350b3654d2d, n = 2097152, CUDALucas v1.66 err = 0.2969 err2 = 0.006023 (2:59 real, 17.8701 ms/iter, ETA 191:57:15)

2^38690000-1 is limit.

clfft-2.0 with fftw3 Twiddle factors constant table

Iteration 10000 M( 38857000 )C, 0xce16ec0b9df7c9c3, n = 2097152, CUDALucas v1.66 err = 0.3438 err2 = 0.00602 (2:39 real, 15.9423 ms/iter, ETA 171:59:57)

2^38857000-1 is limit.

fftw3 constant table version can calculate large number with IBDWT.

I make test program. Compare sin() result on clFFT,fftw3,float128.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <quadmath.h>

#define K2PI 6.2831853071795864769252867665590057683943388
#define by2pi(m, n) ((K2PI * (m)) / (n))
/*
 * Improve accuracy by reducing x to range [0..1/8]
 * before multiplication by 2 * PI.
 */
static void real_cexp(int m, int n, double * si, double * co)
{
     double theta, c, s, t;
     unsigned octant = 0;
     int quarter_n = n;
     n += n; n += n;
     m += m; m += m;
     if (m < 0) m += n;
     if (m > n - m) { m = n - m; octant |= 4; }
     if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
     if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
     theta = by2pi(m, n);
     c = cos(theta); s = sin(theta);
     if (octant & 1) { t = c; c = s; s = t; }
     if (octant & 2) { t = c; c = -s; s = t; }
     if (octant & 4) { s = -s; }
     *co = c;
     *si = s;
}
int main()
{
        int L=1024,k,j;
        for(k=0;k<256;k++)
                for(j=1;j<4;j++)
                {
                        // orignal
                        const double TWO_PI = -6.283185307179586476925286766559;
                        double theta = TWO_PI * ((double)k)/((double)L);
                        double clffts = sin(((double)j) * theta);
                        // fftw
                        double fftws,fftwc;
                        real_cexp(k*j,L,&fftws,&fftwc);
                        fftws = - fftws;
                        // float128
                        __float128 theta128 = strtoflt128("-6.2831853071795864769252867665590057683943388",NULL) * ((__float128)k)/((__float128)L);
                        __float128 f128s = (sinq(((__float128)j) * theta128));

                        if(clffts != fftws)
                        {
                                printf("clFFT = %la\n",clffts);
                                printf("fftw3 = %la\n",fftws);
                                char c[128];
                                quadmath_snprintf(c,sizeof c,"%Qa",f128s);
                                printf("f128  = %s\n",c);
                        }
                }
}

$ cc test.c -lquadmath
$ ./a.out
...
clFFT = -0x1.7d7836cc33db2p-1
fftw3 = -0x1.7d7836cc33db3p-1
f128  = -0x1.7d7836cc33db222c4e2bde07f0a8p-1
clFFT = -0x1.7f8ece357177p-1
fftw3 = -0x1.7f8ece3571771p-1
f128  = -0x1.7f8ece357177098dc9cc5b0dba26p-1
clFFT = -0x1.21a799933eb5bp-1
fftw3 = -0x1.21a799933eb58p-1
f128  = -0x1.21a799933eb58b1613a20e5c136ep-1
clFFT = -0x1.81a1b33b57acbp-1
fftw3 = -0x1.81a1b33b57accp-1
f128  = -0x1.81a1b33b57acba8857b4a6512679p-1
clFFT = -0x1.19d5a09f2b9b9p-1
fftw3 = -0x1.19d5a09f2b9b8p-1
f128  = -0x1.19d5a09f2b9b7ecc9a93975e2b5fp-1
clFFT = -0x1.83b0e0bff976dp-1
fftw3 = -0x1.83b0e0bff976ep-1
f128  = -0x1.83b0e0bff976dd217be0e2b97164p-1
clFFT = -0x1.09e907417c5e2p-1
fftw3 = -0x1.09e907417c5e1p-1
f128  = -0x1.09e907417c5e0806a322f9590affp-1
...

On last half bit,Always fftw win.

k3ack3r commented 11 years ago

Here's more info on Lucas-Lehmer testing.

http://en.wikipedia.org/wiki/Lucas%E2%80%93Lehmer_primality_test

shoichiro-yamada commented 11 years ago

Update test program.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <quadmath.h>

#define K2PI 6.2831853071795864769252867665590057683943388
#define by2pi(m, n) ((K2PI * (m)) / (n))
/*
 * Improve accuracy by reducing x to range [0..1/8]
 * before multiplication by 2 * PI.
 */
static void real_cexp(int m, int n, double * si, double * co)
{
     double theta, c, s, t;
     unsigned octant = 0;
     int quarter_n = n;
     n += n; n += n;
     m += m; m += m;
     if (m < 0) m += n;
     if (m > n - m) { m = n - m; octant |= 4; }
     if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
     if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
     theta = by2pi(m, n);
     c = cos(theta); s = sin(theta);
     if (octant & 1) { t = c; c = s; s = t; }
     if (octant & 2) { t = c; c = -s; s = t; }
     if (octant & 4) { s = -s; }
     *co = c;
     *si = s;
}

int main()
{
    int L=1024,k,j;
        int even=0,fftwwin=0,clfftwin=0;
    for(k=0;k<256;k++)
        for(j=1;j<4;j++)
        {
            // orignal
                        const double TWO_PI = -6.283185307179586476925286766559;
                        double theta = TWO_PI * ((double)k)/((double)L);
                    double clffts = sin(((double)j) * theta);
            // fftw
            double fftws,fftwc;
                    real_cexp(k*j,L,&fftws,&fftwc);
            fftws = - fftws;
            // float128
            __float128 theta128 = strtoflt128("-6.2831853071795864769252867665590057683943388",NULL) * ((__float128)k)/((__float128)L);
                __float128 f128s = (sinq(((__float128)j) * theta128));

            if(clffts != fftws)
            {
                printf("clFFT = %la                %.20lf\n",clffts,clffts); 
                printf("fftw3 = %la                %.20lf\n",fftws,fftws); 
                char c[128];
                quadmath_snprintf(c,sizeof c,"%Qa",f128s);  
                printf("f128  = %s %.20lf\n",c,(double) f128s); 

                if(fabs((double) (((__float128) clffts)-f128s)) <
                    fabs((double) (((__float128) fftws)-f128s)))
                    clfftwin++;
                else
                    fftwwin++;
                printf("clfft %.20f \n",fabs((double) (((__float128) clffts)-f128s)));
                printf("fftw  %.20f \n",fabs((double) (((__float128) fftws)-f128s)));
            }
            else
                even++;
        }
        printf(" Even = %i clfft win = %i fftw win = %i\n",even,clfftwin,fftwwin);
}

Result
clFFT = 0x1.ffa72effef75cp-1                0.99932238458834943273
fftw3 = 0x1.ffa72effef75dp-1                0.99932238458834954375
f128  = 0x1.ffa72effef75c9d2cc8c9369aa84p-1 0.99932238458834954375
clfft 0.00000000000000006816 
fftw  0.00000000000000004286 
clFFT = -0x1.921d1fcdec7b3p-7                -0.01227153828572000692
fftw3 = -0x1.921d1fcdec784p-7                -0.01227153828571992539
f128  = -0x1.921d1fcdec784661e3afa0db67bfp-7 -0.01227153828571992539
clfft 0.00000000000000008084 
fftw  0.00000000000000000069 
Even = 501 clfft win = 26 fftw win = 241

Change parameter
        int L=144,k,j;
        int even=0,fftwwin=0,clfftwin=0;
        for(k=0;k<36;k++)
                for(j=1;j<4;j++)
Result
clfft 0.00000000000000007318 
fftw  0.00000000000000001767 
clFFT = -0x1.64fd6b8c28104p-4                -0.08715574274765819363
fftw3 = -0x1.64fd6b8c28102p-4                -0.08715574274765816587
f128  = -0x1.64fd6b8c281028dd28feb8cb49p-4 -0.08715574274765817975
clfft 0.00000000000000002007 
fftw  0.00000000000000000769 
 Even = 71 clfft win = 8 fftw win = 29

Other one
        int L=960,k,j;
        int even=0,fftwwin=0,clfftwin=0;
        for(k=0;k<480;k++)
                for(j=1;j<2;j++)
Result                
clFFT = -0x1.acedd6862d18fp-8                -0.00654493796735201774
fftw3 = -0x1.acedd6862d0d7p-8                -0.00654493796735185814
f128  = -0x1.acedd6862d0d7442a6bf8bae3517p-8 -0.00654493796735185814
clfft 0.00000000000000015936 
fftw  0.00000000000000000023 
 Even = 304 clfft win = 16 fftw win = 160

shoichiro-yamada commented 11 years ago

Perhaps you can rephrase your request in more generic terms, such as the desire for more precision in the sin/cos twiddle tables in the library.

Now, I understand fftw code. I can write Licence Free code. Thank you everything.

kknox commented 11 years ago

Closing this issue due to inactivity

clMathLibraries / clFFT

Enhance Twiddle factors constant table. #13