Open Quuxplusone opened 7 years ago
Bugzilla Link | PR31510 |
Status | NEW |
Importance | P normal |
Reported by | Davide Italiano (ditaliano@apple.com) |
Reported on | 2017-01-02 05:26:22 -0800 |
Last modified on | 2018-02-11 02:39:09 -0800 |
Version | trunk |
Hardware | PC All |
CC | drraph@gmail.com, filcab@gmail.com, llvm-bugs@lists.llvm.org, simon.f.whittaker@gmail.com, spatel+llvm@rotateright.com |
Fixed by commit(s) | |
Attachments | |
Blocks | |
Blocked by | |
See also | PR31866 |
Another example of not great output for complex:
#include <complex.h>
complex double f(complex double x) {
return x*x;
}
LLVM 3.9 output:
f: # @f
movapd xmm3, xmm1
movapd xmm2, xmm0
mulsd xmm0, xmm0
movapd xmm4, xmm3
mulsd xmm4, xmm4
movapd xmm1, xmm2
mulsd xmm1, xmm3
subsd xmm0, xmm4
addsd xmm1, xmm1
ucomisd xmm0, xmm0
jnp .LBB0_3
ucomisd xmm1, xmm1
jp .LBB0_2
.LBB0_3:
ret
.LBB0_2:
push rax
movapd xmm0, xmm2
movapd xmm1, xmm3
call __muldc3
add rsp, 8
ret
gcc output:
f:
movapd xmm3, xmm1
movapd xmm2, xmm0
jmp __muldc3
(It is equally not great if you pass -march=avx2, FWIW)
(this is at -O3)
The first example with -ffast-math shows a slightly more dramatic difference:
clang 3.9 -O3 -ffast-math -std=c++14
.LCPI0_0:
.quad 4607182418800017408 # double 1
foo(): # @foo()
mov eax, 2
movsd xmm0, qword ptr [rip + .LCPI0_0] # xmm0 = mem[0],zero
xorpd xmm1, xmm1
movapd xmm3, xmm0
xorpd xmm2, xmm2
jmp .LBB0_1
.LBB0_4: # in Loop: Header=BB0_1 Depth=1
movapd xmm4, xmm3
mulsd xmm4, xmm0
movapd xmm5, xmm2
mulsd xmm5, xmm1
addsd xmm5, xmm4
mulsd xmm0, xmm2
mulsd xmm1, xmm3
subsd xmm0, xmm1
movapd xmm1, xmm5
.LBB0_1: # =>This Loop Header: Depth=1
movapd xmm4, xmm3
.LBB0_2: # Parent Loop BB0_1 Depth=1
shr eax
je .LBB0_5
movapd xmm3, xmm2
addsd xmm3, xmm3
mulsd xmm3, xmm4
mulsd xmm2, xmm2
mulsd xmm4, xmm4
subsd xmm2, xmm4
test al, 1
movapd xmm4, xmm3
je .LBB0_2
jmp .LBB0_4
.LBB0_5:
ret
GCC:
foo():
pxor %xmm1, %xmm1
movsd .LC0(%rip), %xmm0
ret
.LC0:
.long 0
.long -1074790400