Closed rikkimax closed 1 month ago
i32
return type did not work when I tried it.
i32
return type did not work when I tried it.
It works fine for me:
pragma(LDC_intrinsic, "llvm.lrint.i64.f#")
long llvm_lrint(T)(T val)
if (__traits(isFloating, T));
pragma(LDC_intrinsic, "llvm.lrint.i32.f#")
int llvm_lrint32(T)(T val)
if (__traits(isFloating, T));
extern(C) auto foo (float f) {
return llvm_lrint(f);
}
extern(C) auto foo32 (double f) {
return llvm_lrint32(f);
}
$ ldc2 -c a.d && objdump a.o -D -j .text.foo
Disassembly of section .text.foo:
0000000000000000 <foo>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: f3 0f 11 45 fc movss %xmm0,-0x4(%rbp)
9: f3 0f 10 45 fc movss -0x4(%rbp),%xmm0
e: f3 48 0f 2d c0 cvtss2si %xmm0,%rax
13: 5d pop %rbp
14: c3 ret
$ ldc2 -c a.d && objdump a.o -D -j .text.foo32
Disassembly of section .text.foo32:
0000000000000000 <foo32>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: f2 0f 11 45 f8 movsd %xmm0,-0x8(%rbp)
9: f2 0f 10 45 f8 movsd -0x8(%rbp),%xmm0
e: f2 0f 2d c0 cvtsd2si %xmm0,%eax
12: 5d pop %rbp
13: c3 ret
It also works with target features (notice vcvtss2si
instead of cvtss2si
):
$ ldc2 -mattr=avx -c a.d && objdump a.o -D -j .text.foo
Disassembly of section .text.foo:
0000000000000000 <foo>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: c5 fa 11 45 fc vmovss %xmm0,-0x4(%rbp)
9: c5 fa 10 45 fc vmovss -0x4(%rbp),%xmm0
e: c4 e1 fa 2d c0 vcvtss2si %xmm0,%rax
13: 5d pop %rbp
14: c3 ret
Or without:
$ ldc2 -mattr=-sse -c a.d && objdump a.o -D -j .text.foo
Disassembly of section .text.foo:
0000000000000000 <foo>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: d9 45 10 flds 0x10(%rbp)
7: dd d8 fstp %st(0)
9: d9 45 10 flds 0x10(%rbp)
c: df 7d f8 fistpll -0x8(%rbp)
f: 48 8b 45 f8 mov -0x8(%rbp),%rax
13: 5d pop %rbp
14: c3 ret
While at it, these are missing intrinsics:
llvm.roundeven
llvm.lround
llvm.llround
llvm.lrint
llvm.llrint
All are available since at least LLVM 11.
Someone on Discord needs
lrint
to generateCVTSS2SI
.I was able to find that this is a missing intrinsic. It is not hooked up to core.stdc.math or std.math.