Closed p12tic closed 3 months ago
Starting with GCC 11 and as of GCC 14.1, the following code:
uint32x4_t test(float32x4_t a, float32x4_t b) { return vcgtq_f32(a, b); }
Generates the following assembly on armv7:
test(__simd128_float32_t, __simd128_float32_t): vmov.32 r3, d0[0] sub sp, sp, #16 vmov s8, r3 vmov.32 r3, d0[1] vmov s10, r3 vmov.32 r3, d1[0] vmov s12, r3 vmov.32 r3, d1[1] vmov s14, r3 vmov.32 r3, d2[0] vmov s9, r3 vmov.32 r3, d2[1] vcmpe.f32 s8, s9 vmov s11, r3 vmov.32 r3, d3[0] vmrs APSR_nzcv, FPSCR vcmpe.f32 s10, s11 vmov s13, r3 vmov.32 r3, d3[1] vmov s15, r3 ite gt movgt r3, #-1 movle r3, #0 vmrs APSR_nzcv, FPSCR vcmpe.f32 s12, s13 str r3, [sp] ite gt movgt r3, #-1 movle r3, #0 vmrs APSR_nzcv, FPSCR vcmpe.f32 s14, s15 str r3, [sp, #4] ite gt movgt r3, #-1 movle r3, #0 vmrs APSR_nzcv, FPSCR str r3, [sp, #8] ite gt movgt r3, #-1 movle r3, #0 str r3, [sp, #12] vld1.64 {d0-d1}, [sp:64] add sp, sp, #16 bx lr
Starting with GCC 11 and as of GCC 14.1, the following code:
Generates the following assembly on armv7: