ldc-developers / ldc

The LLVM-based D Compiler.
http://wiki.dlang.org/LDC
Other
1.21k stars 263 forks source link

Missing intrinsic lrint #4754

Closed rikkimax closed 1 month ago

rikkimax commented 2 months ago

Someone on Discord needs lrint to generate CVTSS2SI.

pragma(LDC_intrinsic, "llvm.lrint.i64.f#")
    long llvm_lrint(T)(T val)
        if (__traits(isFloating, T));

I was able to find that this is a missing intrinsic. It is not hooked up to core.stdc.math or std.math.

rikkimax commented 2 months ago

i32 return type did not work when I tried it.

https://llvm.org/docs/LangRef.html#llvm-lrint-intrinsic

the-horo commented 2 months ago

i32 return type did not work when I tried it.

It works fine for me:

pragma(LDC_intrinsic, "llvm.lrint.i64.f#")
    long llvm_lrint(T)(T val)
        if (__traits(isFloating, T));

pragma(LDC_intrinsic, "llvm.lrint.i32.f#")
    int llvm_lrint32(T)(T val)
        if (__traits(isFloating, T));

extern(C) auto foo (float f) {
    return llvm_lrint(f);
}
extern(C) auto foo32 (double f) {
    return llvm_lrint32(f);
}
$ ldc2 -c a.d && objdump a.o -D -j .text.foo
Disassembly of section .text.foo:
0000000000000000 <foo>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   f3 0f 11 45 fc          movss  %xmm0,-0x4(%rbp)
   9:   f3 0f 10 45 fc          movss  -0x4(%rbp),%xmm0
   e:   f3 48 0f 2d c0          cvtss2si %xmm0,%rax
  13:   5d                      pop    %rbp
  14:   c3                      ret
$ ldc2 -c a.d && objdump a.o -D -j .text.foo32
Disassembly of section .text.foo32:
0000000000000000 <foo32>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   f2 0f 11 45 f8          movsd  %xmm0,-0x8(%rbp)
   9:   f2 0f 10 45 f8          movsd  -0x8(%rbp),%xmm0
   e:   f2 0f 2d c0             cvtsd2si %xmm0,%eax
  12:   5d                      pop    %rbp
  13:   c3                      ret

It also works with target features (notice vcvtss2si instead of cvtss2si):

$ ldc2 -mattr=avx -c a.d && objdump a.o -D -j .text.foo
Disassembly of section .text.foo:
0000000000000000 <foo>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   c5 fa 11 45 fc          vmovss %xmm0,-0x4(%rbp)
   9:   c5 fa 10 45 fc          vmovss -0x4(%rbp),%xmm0
   e:   c4 e1 fa 2d c0          vcvtss2si %xmm0,%rax
  13:   5d                      pop    %rbp
  14:   c3                      ret

Or without:

$ ldc2 -mattr=-sse -c a.d && objdump a.o -D -j .text.foo
Disassembly of section .text.foo:
0000000000000000 <foo>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   d9 45 10                flds   0x10(%rbp)
   7:   dd d8                   fstp   %st(0)
   9:   d9 45 10                flds   0x10(%rbp)
   c:   df 7d f8                fistpll -0x8(%rbp)
   f:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  13:   5d                      pop    %rbp
  14:   c3                      ret
JohanEngelen commented 2 months ago

While at it, these are missing intrinsics: