Closed shoichiro-yamada closed 11 years ago
Hi @shoichiro-yamada FFTW is open source software licensed under GPL, which is incompatible with the Apache license that clMath is released under. We can not accept/integrate code contributions from GPL software without compromising the Apache license that we chose for clMath.
Perhaps you can rephrase your request in more generic terms, such as the desire for more precision in the sin/cos twiddle tables in the library.
You have not explained the reason for your request. Why do you feel the accuracy is not good enough?
Hi @kknox
I know you are computer scientist,but you want.
On my mersenne prime lucas lamer test program.
orignal clfft-2.0
Iteration 10000 M( 38690000 )C, 0x7acca350b3654d2d, n = 2097152, CUDALucas v1.66 err = 0.2969 err2 = 0.006023 (2:59 real, 17.8701 ms/iter, ETA 191:57:15)
2^38690000-1 is limit.
clfft-2.0 with fftw3 Twiddle factors constant table
Iteration 10000 M( 38857000 )C, 0xce16ec0b9df7c9c3, n = 2097152, CUDALucas v1.66 err = 0.3438 err2 = 0.00602 (2:39 real, 15.9423 ms/iter, ETA 171:59:57)
2^38857000-1 is limit.
fftw3 constant table version can calculate large number with IBDWT.
I make test program. Compare sin() result on clFFT,fftw3,float128.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <quadmath.h>
#define K2PI 6.2831853071795864769252867665590057683943388
#define by2pi(m, n) ((K2PI * (m)) / (n))
/*
* Improve accuracy by reducing x to range [0..1/8]
* before multiplication by 2 * PI.
*/
static void real_cexp(int m, int n, double * si, double * co)
{
double theta, c, s, t;
unsigned octant = 0;
int quarter_n = n;
n += n; n += n;
m += m; m += m;
if (m < 0) m += n;
if (m > n - m) { m = n - m; octant |= 4; }
if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
theta = by2pi(m, n);
c = cos(theta); s = sin(theta);
if (octant & 1) { t = c; c = s; s = t; }
if (octant & 2) { t = c; c = -s; s = t; }
if (octant & 4) { s = -s; }
*co = c;
*si = s;
}
int main()
{
int L=1024,k,j;
for(k=0;k<256;k++)
for(j=1;j<4;j++)
{
// orignal
const double TWO_PI = -6.283185307179586476925286766559;
double theta = TWO_PI * ((double)k)/((double)L);
double clffts = sin(((double)j) * theta);
// fftw
double fftws,fftwc;
real_cexp(k*j,L,&fftws,&fftwc);
fftws = - fftws;
// float128
__float128 theta128 = strtoflt128("-6.2831853071795864769252867665590057683943388",NULL) * ((__float128)k)/((__float128)L);
__float128 f128s = (sinq(((__float128)j) * theta128));
if(clffts != fftws)
{
printf("clFFT = %la\n",clffts);
printf("fftw3 = %la\n",fftws);
char c[128];
quadmath_snprintf(c,sizeof c,"%Qa",f128s);
printf("f128 = %s\n",c);
}
}
}
$ cc test.c -lquadmath
$ ./a.out
...
clFFT = -0x1.7d7836cc33db2p-1
fftw3 = -0x1.7d7836cc33db3p-1
f128 = -0x1.7d7836cc33db222c4e2bde07f0a8p-1
clFFT = -0x1.7f8ece357177p-1
fftw3 = -0x1.7f8ece3571771p-1
f128 = -0x1.7f8ece357177098dc9cc5b0dba26p-1
clFFT = -0x1.21a799933eb5bp-1
fftw3 = -0x1.21a799933eb58p-1
f128 = -0x1.21a799933eb58b1613a20e5c136ep-1
clFFT = -0x1.81a1b33b57acbp-1
fftw3 = -0x1.81a1b33b57accp-1
f128 = -0x1.81a1b33b57acba8857b4a6512679p-1
clFFT = -0x1.19d5a09f2b9b9p-1
fftw3 = -0x1.19d5a09f2b9b8p-1
f128 = -0x1.19d5a09f2b9b7ecc9a93975e2b5fp-1
clFFT = -0x1.83b0e0bff976dp-1
fftw3 = -0x1.83b0e0bff976ep-1
f128 = -0x1.83b0e0bff976dd217be0e2b97164p-1
clFFT = -0x1.09e907417c5e2p-1
fftw3 = -0x1.09e907417c5e1p-1
f128 = -0x1.09e907417c5e0806a322f9590affp-1
...
On last half bit,Always fftw win.
Here's more info on Lucas-Lehmer testing.
http://en.wikipedia.org/wiki/Lucas%E2%80%93Lehmer_primality_test
Update test program.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <quadmath.h>
#define K2PI 6.2831853071795864769252867665590057683943388
#define by2pi(m, n) ((K2PI * (m)) / (n))
/*
* Improve accuracy by reducing x to range [0..1/8]
* before multiplication by 2 * PI.
*/
static void real_cexp(int m, int n, double * si, double * co)
{
double theta, c, s, t;
unsigned octant = 0;
int quarter_n = n;
n += n; n += n;
m += m; m += m;
if (m < 0) m += n;
if (m > n - m) { m = n - m; octant |= 4; }
if (m - quarter_n > 0) { m = m - quarter_n; octant |= 2; }
if (m > quarter_n - m) { m = quarter_n - m; octant |= 1; }
theta = by2pi(m, n);
c = cos(theta); s = sin(theta);
if (octant & 1) { t = c; c = s; s = t; }
if (octant & 2) { t = c; c = -s; s = t; }
if (octant & 4) { s = -s; }
*co = c;
*si = s;
}
int main()
{
int L=1024,k,j;
int even=0,fftwwin=0,clfftwin=0;
for(k=0;k<256;k++)
for(j=1;j<4;j++)
{
// orignal
const double TWO_PI = -6.283185307179586476925286766559;
double theta = TWO_PI * ((double)k)/((double)L);
double clffts = sin(((double)j) * theta);
// fftw
double fftws,fftwc;
real_cexp(k*j,L,&fftws,&fftwc);
fftws = - fftws;
// float128
__float128 theta128 = strtoflt128("-6.2831853071795864769252867665590057683943388",NULL) * ((__float128)k)/((__float128)L);
__float128 f128s = (sinq(((__float128)j) * theta128));
if(clffts != fftws)
{
printf("clFFT = %la %.20lf\n",clffts,clffts);
printf("fftw3 = %la %.20lf\n",fftws,fftws);
char c[128];
quadmath_snprintf(c,sizeof c,"%Qa",f128s);
printf("f128 = %s %.20lf\n",c,(double) f128s);
if(fabs((double) (((__float128) clffts)-f128s)) <
fabs((double) (((__float128) fftws)-f128s)))
clfftwin++;
else
fftwwin++;
printf("clfft %.20f \n",fabs((double) (((__float128) clffts)-f128s)));
printf("fftw %.20f \n",fabs((double) (((__float128) fftws)-f128s)));
}
else
even++;
}
printf(" Even = %i clfft win = %i fftw win = %i\n",even,clfftwin,fftwwin);
}
Result
clFFT = 0x1.ffa72effef75cp-1 0.99932238458834943273
fftw3 = 0x1.ffa72effef75dp-1 0.99932238458834954375
f128 = 0x1.ffa72effef75c9d2cc8c9369aa84p-1 0.99932238458834954375
clfft 0.00000000000000006816
fftw 0.00000000000000004286
clFFT = -0x1.921d1fcdec7b3p-7 -0.01227153828572000692
fftw3 = -0x1.921d1fcdec784p-7 -0.01227153828571992539
f128 = -0x1.921d1fcdec784661e3afa0db67bfp-7 -0.01227153828571992539
clfft 0.00000000000000008084
fftw 0.00000000000000000069
Even = 501 clfft win = 26 fftw win = 241
Change parameter
int L=144,k,j;
int even=0,fftwwin=0,clfftwin=0;
for(k=0;k<36;k++)
for(j=1;j<4;j++)
Result
clfft 0.00000000000000007318
fftw 0.00000000000000001767
clFFT = -0x1.64fd6b8c28104p-4 -0.08715574274765819363
fftw3 = -0x1.64fd6b8c28102p-4 -0.08715574274765816587
f128 = -0x1.64fd6b8c281028dd28feb8cb49p-4 -0.08715574274765817975
clfft 0.00000000000000002007
fftw 0.00000000000000000769
Even = 71 clfft win = 8 fftw win = 29
Other one
int L=960,k,j;
int even=0,fftwwin=0,clfftwin=0;
for(k=0;k<480;k++)
for(j=1;j<2;j++)
Result
clFFT = -0x1.acedd6862d18fp-8 -0.00654493796735201774
fftw3 = -0x1.acedd6862d0d7p-8 -0.00654493796735185814
f128 = -0x1.acedd6862d0d7442a6bf8bae3517p-8 -0.00654493796735185814
clfft 0.00000000000000015936
fftw 0.00000000000000000023
Even = 304 clfft win = 16 fftw win = 160
Perhaps you can rephrase your request in more generic terms, such as the desire for more precision in the sin/cos twiddle tables in the library.
Now, I understand fftw code. I can write Licence Free code. Thank you everything.
Closing this issue due to inactivity
Please Code abduct from fftw3.