Open Karutsenko opened 2 years ago
I did some additional work and found one peculiarity: at the fft size (n) 256 fftw works! for claritty, i printed the plan? and recorded the result: void fft_f_test2(float* mgn) { int n = 512;
fftwf_plan p;
fftwf_complex *s, *d;
int nbthreads = omp_get_max_threads();
fftwf_init_threads();
fftwf_plan_with_nthreads(nbthreads);
s =(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)* n);
d =(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)* n);
//fftwf_set_timelimit(FFTW_NO_TIMELIMIT);
//fftwf_make_planner_thread_safe();
p = fftwf_plan_dft_1d(n, s, d, FFTW_FORWARD, FFTW_ESTIMATE);
printf(" "); fftwf_print_plan(p); printf("\n");
/* this work, n=64
* (dft-thr-ct-dit-x2/8
(dftw-direct-8/12 "t3fv_8_neon")
(dftw-direct-8/12 "t3fv_8_neon")
(dft-thr-vrank>=1-x2/1
(dft-direct-8-x4 "n2fv_8_neon")
(dft-direct-8-x4 "n2fv_8_neon")))
n=128 =>this work!
(dft-thr-ct-dit-x2/8
(dftw-direct-8/12 "t3fv_8_neon")
(dftw-direct-8/12 "t3fv_8_neon")
(dft-thr-vrank>=1-x2/1
(dft-direct-16-x4 "n2fv_16_neon")
(dft-direct-16-x4 "n2fv_16_neon")))
n=256 =>this work!
(dft-thr-ct-dit-x2/16
(dftw-direct-16/16 "t3fv_16_neon")
(dftw-direct-16/16 "t3fv_16_neon")
(dft-thr-vrank>=1-x2/1
(dft-direct-16-x8 "n2fv_16_neon")
(dft-direct-16-x8 "n2fv_16_neon")))
n=512 dont'n work!!!
(dft-thr-ct-dit-x2/16
(dftw-direct-16/16 "t3fv_16_neon")
(dftw-direct-16/16 "t3fv_16_neon")
(dft-thr-vrank>=1-x2/1
(dft-direct-32-x8 "n2fv_32_neon")
(dft-direct-32-x8 "n2fv_32_neon")))
===========================================
Child terminated with signal = 0xb (SIGSEGV)
*/
for(int i=0; i<n; i++)
{
float theta = (float)i/(float)n * M_PI;
s[i][REAL] = 1.0 * cos(10.0 * theta) + 0.5 * cos(25.0 * theta);
s[i][IMAG] = 1.0 * sin(10.0 * theta) + 0.5 * sin(25.0 * theta);
}
//fftwf_execute(p);
fftwf_execute_dft(p, s, d);
for(int i=0; i<n; i++) {
mgn[i] = sqrt(SQR(d[i][REAL])+ SQR(d[i][IMAG]));
}
fftwf_cleanup_threads();
fftwf_destroy_plan(p);
fftwf_free(s);
fftwf_free(d);
printf("fft test success!\n");
}
I try execute code with stm32mp1 stm32mp1 (two core) armV7: I have exeption Child terminated with signal = 0xb (SIGSEGV)
//=======================================
include
include
include
define FFT_SIZE 1024
define REAL 0
define IMAG 1
define I256 256
define I128 128
define F128 128.0f
static fftwf_plan plan; static fftwf_complex src, dst;
void fft_f_test(float* mgn) {
}
//=======================================================================================
Disassembly:
b6f32c74: ldr r2, [pc, #8] ; (0xb6f32c80 <fftwf_codelet_n2fv_16_neon+12>) b6f32c76: ldr r1, [pc, #12] ; (0xb6f32c84 <fftwf_codelet_n2fv_16_neon+16>) b6f32c78: add r2, pc b6f32c7a: add r1, pc b6f32c7c: b.w 0xb6e72ca4 fftwf_kdft_register@plt b6f32c80: ldmia r2, {r2, r7} b6f32c82: movs r0, r1 b6f32c84: ; instruction: 0xfb43ffff
b6f32c88: stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
b6f32c8c: mov r11, r2
b6f32c8e: vpush {d8-d15}
b6f32c92: ldr.w r2, [pc, #1600] ; 0xb6f332d4
b6f32c96: sub.w sp, sp, #524 ; 0x20c
b6f32c9a: ldr r1, [sp, #632] ; 0x278
b6f32c9c: add r2, pc
b6f32c9e: str r0, [sp, #0]
b6f32ca0: cmp r1, #0
b6f32ca2: ldr r4, [sp, #624] ; 0x270
b6f32ca4: ldr r0, [sp, #640] ; 0x280
b6f32ca6: ble.w 0xb6f33c0e
b6f32caa: ldr.w r3, [pc, #1580] ; 0xb6f332d8
b6f32cae: mov r10, r4vm9ctv97u6dk8uu
b6f32cb0: str r1, [sp, #20]
b6f32cb2: ldr r3, [r2, r3]
b6f32cb4: ldr r3, [r3, #0]
b6f32cb6: str r3, [sp, #516] ; 0x204
b6f32cb8: lsls r3, r0, #2
b6f32cba: str r3, [sp, #512] ; 0x200
b6f32cbc: lsls r3, r0, #3
b6f32cbe: str r3, [sp, #508] ; 0x1fc
b6f32cc0: ldr r3, [sp, #636] ; 0x27c
b6f32cc2: lsls r3, r3, #3
b6f32cc4: str r3, [sp, #504] ; 0x1f8
b6f32cc6: ldr r2, [sp, #0]
b6f32cc8: mov.w r9, #76 ; 0x4c
b6f32ccc: mov.w r0, r10, lsl #6
b6f32cd0: add.w r5, r11, #224 ; 0xe0
b6f32cd4: mvn.w r3, #31
b6f32cd8: mul.w r12, r3, r10
b6f32cdc: adds r3, r2, r0
b6f32cde: movs r2, #24
b6f32ce0: mvn.w r6, #79 ; 0x4f
b6f32ce4: mov r1, r3
b6f32ce6: add.w r8, r1, r12
b6f32cea: movs r3, #56 ; 0x38
b6f32cec: mul.w r6, r6, r10
b6f32cf0: vld1.32 {d14-d15}, [r8], r0
b6f32cf4: str r1, [sp, #200] ; 0xc8
b6f32cf6: mvn.w r1, #63 ; 0x3f
b6f32cfa: add.w r7, r8, r6
b6f32cfe: mul.w r1, r1, r10
b6f32d02: vld1.32 {d12-d13}, [r7], r0
b6f32d06: vld1.32 {d18-d19}, [r8]
b6f32d0a: vld1.32 {d16-d17}, [r7], r7
b6f32d0e: mvn.w lr, #119 ; 0x77
b6f32d12: vstr d14, [sp, #472] ; 0x1d8
b6f32d16: vstr d15, [sp, #480] ; 0x1e0
b6f32d1a: add.w r8, r7, r1
b6f32d1e: mla r9, r9, r10, r8
b6f32d22: mla lr, lr, r10, r9
b6f32d26: mla r3, r3, r10, lr
b6f32d2a: vld1.32 {d28-d29}, [r9] <- THIS PROBLEM, Break at address "0xb6f32d2a" with no debug information available, or outside of program code.
RETURN Child terminated with signal = 0xb (SIGSEGV)