[PowerPC] llvm generates NaN in __fixunsdfdi for PPC-32 e500mc

Long5hot commented 6 months ago

Set FPSCR register value accordingly (1) FP invalid exc. enable (2) FP overflow exc. enable (3) FP underflow exc. enable (4) FP divide by zero exc. enable

Below test calls __fixunsdfdi from compiler-rt while typecasting from double to unsigned long long.

void testIntrinsic (unsigned int fpErrorOptions) {

    double d = 2147483456.000000;
    uint64_t ui64_var = (uint64_t)(d);
    kprintf("Double = %f, uint64_t = %16.16X sizeof(uint64_t)=%d\n", d, ui64_var, sizeof(uint64_t));
    d= 2147483712.000000;
    kprintf("Converting to uint64_t...\n");
    ui64_var = (uint64_t)(d);
    kprintf("Converted to uint64_t...\n");
    kprintf("Double = %f, uint64_t = %16.16X count=%d\n", d, ui64_var);
}

long long unsigned __fixunsdfdi(double a) {
  if (a <= 0.0)
    return 0;
  unsigned int high = a / 4294967296.f;
  unsigned int low = a - (double)high * 4294967296.f;
  return ((long long unsigned int)high << 32) | low;
}

llvm generates NaN for input 2147483712.000000 on PowerPC-32 e500mc and GNU does not for the same compiler-rt __fixunsdfdi .

clang --target=ppc32 -mcpu=e500mc -mlong-double-64 -mno-spe

Assembly generated by llvm for __fixunsdfdi


        .text
        .file   "llvm-fixunsdfdi.c"
        .section        .rodata.cst4,"aM",@progbits,4
        .p2align        2, 0x0                          # -- Begin function __fixunsdfdi
.LCPI0_0:
        .long   0x00000000                      # float 0
.LCPI0_1:
        .long   0x4f800000                      # float 4.2949673E+9
.LCPI0_2:
        .long   0x4f000000                      # float 2.14748365E+9
.LCPI0_3:
        .long   0x59800000                      # float 4.50359963E+15
        .text
        .globl  __fixunsdfdi
        .p2align        4
        .type   __fixunsdfdi,@function
__fixunsdfdi:                                    # @__fixunsdfdi
.Lfunc_begin0:
# %bb.0:
        stwu 1, -80(1)
        stw 31, 76(1)
        mr      31, 1
        stfd 1, 56(31)
        lfd 0, 56(31)
        lis 3, .LCPI0_0@ha
        lfs 1, .LCPI0_0@l(3)
        fcmpu 7, 0, 1
        mfcr 3                                  # cr7
        rotlwi  3, 3, 28
        stw 3, 20(31)
        mfcr 3                                  # cr7
        lwz 4, 20(31)
        rotlwi  4, 4, 4
        mtcrf 1, 4                              # cr7
        clrlwi  4, 3, 31
        mfcr 3                                  # cr7
        rlwinm 3, 3, 30, 31, 31
        or 3, 3, 4
        cmplwi  3, 0
        bne     0, .LBB0_2
        b .LBB0_1
.LBB0_1:
        li 3, 0
        stw 3, 68(31)
        stw 3, 64(31)
        b .LBB0_3
.LBB0_2:
        lfd 0, 56(31)
        lis 3, .LCPI0_1@ha
        lfs 2, .LCPI0_1@l(3)
        fdiv 0, 0, 2
        lis 3, .LCPI0_2@ha
        lfs 1, .LCPI0_2@l(3)
        fsub 3, 0, 1
        fctiwz 3, 3
        addi 3, 31, 36
        stfiwx 3, 0, 3
        fctiwz 3, 0
        addi 3, 31, 32
        stfiwx 3, 0, 3
        fcmpu 0, 0, 1
        lwz 3, 36(31)
        xoris 4, 3, 32768
        lwz 3, 32(31)
        isellt  3, 3, 4
        stw 3, 52(31)
        lfd 3, 56(31)
        lwz 3, 52(31)
        lis 4, 17200
        stw 4, 40(31)
        stw 3, 44(31)
        lfd 0, 40(31)
        lis 3, .LCPI0_3@ha
        lfs 4, .LCPI0_3@l(3)
        fsub 0, 0, 4
        fneg 0, 0
        fmadd 0, 0, 2, 3
        fsub 2, 0, 1
        fctiwz 2, 2
        addi 3, 31, 28
        stfiwx 2, 0, 3
        fctiwz 2, 0
        addi 3, 31, 24
        stfiwx 2, 0, 3
        fcmpu 0, 0, 1
        lwz 3, 28(31)
        xoris 4, 3, 32768
        lwz 3, 24(31)
        isellt  3, 3, 4
        stw 3, 48(31)
        lwz 3, 52(31)
        lwz 4, 48(31)
        stw 4, 68(31)
        stw 3, 64(31)
        b .LBB0_3
.LBB0_3:
        lwz 3, 64(31)
        lwz 4, 68(31)
        lwz 31, 76(1)
        addi 1, 1, 80
        blr
.Lfunc_end0:
        .size   __fixunsdfdi, .Lfunc_end0-.Lfunc_begin0
                                        # -- End function
        .ident  "clang version 17.0.6.1"
        .section        ".note.GNU-stack","",@progbits
        .addrsig

Assembly Generated by GNU for the same __fixunsdfdi


        .p2align 4,,15
        .globl __fixunsdfdi
        .type   __fixunsdfdi, @function
__fixunsdfdi:
        lis 9,.LC1@ha
        lfs 0,.LC1@l(9)
        fcmpu 7,1,0
        cror 30,28,30
        beq- 7,.L7
        lis 9,.LC3@ha
        stwu 1,-32(1)
        lfs 0,.LC3@l(9)
        lis 9,.LC5@ha
        lfs 12,.LC5@l(9)
        fmul 0,1,0
        fcmpu 7,0,12
        cror 30,29,30
        beq- 7,.L3
        fctiwz 0,0
        addi 10,1,28
        lfs 12,.LC5@l(9)
        stfiwx 0,0,10
        lis 10,0x4330
        stw 10,8(1)
        lis 10,.LC7@ha
        lfs 0,.LC7@l(10)
        lis 10,.LC9@ha
        lwz 3,28(1)
        lfs 11,.LC9@l(10)
        stw 3,12(1)
        lfd 10,8(1)
        fsub 0,10,0
        fneg 0,0
        fmadd 1,0,11,1
        fcmpu 7,1,12
        cror 30,29,30
        beq- 7,.L5
.L13:
        fctiwz 1,1
        addi 9,1,20
        stfiwx 1,0,9
        lwz 4,20(1)
        addi 1,1,32
        blr
        .p2align 4,,15
.L3:
        fsub 0,0,12
        addi 10,1,24
        lfs 12,.LC5@l(9)
        fctiwz 0,0
        stfiwx 0,0,10
        lis 10,0x4330
        stw 10,8(1)
        lis 10,.LC7@ha
        lfs 0,.LC7@l(10)
        lis 10,.LC9@ha
        lwz 3,24(1)
        lfs 11,.LC9@l(10)
        addis 3,3,0x8000
        stw 3,12(1)
        lfd 10,8(1)
        fsub 0,10,0
        fneg 0,0
        fmadd 1,0,11,1
        fcmpu 7,1,12
        cror 30,29,30
        bne+ 7,.L13
.L5:
        fsub 1,1,12
        addi 9,1,16
        fctiwz 1,1
        stfiwx 1,0,9
        lwz 4,16(1)
        addi 1,1,32
        addis 4,4,0x8000
        blr
        .p2align 4,,15
.L7:
        li 3,0
        li 4,0
        blr
        .size   __fixunsdfdi,.-__fixunsdfdi
        .section        .rodata.cst4,"aM",@progbits,4
        .align 2
.LC1:
        .long   0
.LC3:
        .long   796917760
.LC5:
        .long   1325400064
.LC7:
        .long   1501560832
.LC9:
        .long   1333788672
        .ident  "GCC: (Wind River VxWorks GCC 8.3.0.5 - 2022.02.07) 8.3.0"
        .gnu_attribute 4, 9
        .section        .note.GNU-stack,"",@progbits

llvmbot commented 6 months ago

@llvm/issue-subscribers-backend-powerpc

Author: Kishan Parmar (Long5hot)

Set FPSCR register value accordingly (1) FP invalid exc. enable (2) FP overflow exc. enable (3) FP underflow exc. enable (4) FP divide by zero exc. enable Below test calls __fixunsdfdi from compiler-rt while typecasting from double to unsigned long long. ``` void testIntrinsic (unsigned int fpErrorOptions) { double d = 2147483456.000000; uint64_t ui64_var = (uint64_t)(d); kprintf("Double = %f, uint64_t = %16.16X sizeof(uint64_t)=%d\n", d, ui64_var, sizeof(uint64_t)); d= 2147483712.000000; kprintf("Converting to uint64_t...\n"); ui64_var = (uint64_t)(d); kprintf("Converted to uint64_t...\n"); kprintf("Double = %f, uint64_t = %16.16X count=%d\n", d, ui64_var); } ``` ``` long long unsigned __fixunsdfdi(double a) { if (a <= 0.0) return 0; unsigned int high = a / 4294967296.f; unsigned int low = a - (double)high * 4294967296.f; return ((long long unsigned int)high << 32) | low; } ``` **llvm generates NaN for input **2147483712.000000** on PowerPC-32 e500mc and GNU does not for the same compiler-rt __fixunsdfdi .** `clang --target=ppc32 -mcpu=e500mc -mlong-double-64 -mno-spe` Assembly generated by llvm for __fixunsdfdi ``` .text .file "llvm-fixunsdfdi.c" .section .rodata.cst4,"aM",@progbits,4 .p2align 2, 0x0 # -- Begin function __fixunsdfdi .LCPI0_0: .long 0x00000000 # float 0 .LCPI0_1: .long 0x4f800000 # float 4.2949673E+9 .LCPI0_2: .long 0x4f000000 # float 2.14748365E+9 .LCPI0_3: .long 0x59800000 # float 4.50359963E+15 .text .globl __fixunsdfdi .p2align 4 .type __fixunsdfdi,@function __fixunsdfdi: # @__fixunsdfdi .Lfunc_begin0: # %bb.0: stwu 1, -80(1) stw 31, 76(1) mr 31, 1 stfd 1, 56(31) lfd 0, 56(31) lis 3, .LCPI0_0@ha lfs 1, .LCPI0_0@l(3) fcmpu 7, 0, 1 mfcr 3 # cr7 rotlwi 3, 3, 28 stw 3, 20(31) mfcr 3 # cr7 lwz 4, 20(31) rotlwi 4, 4, 4 mtcrf 1, 4 # cr7 clrlwi 4, 3, 31 mfcr 3 # cr7 rlwinm 3, 3, 30, 31, 31 or 3, 3, 4 cmplwi 3, 0 bne 0, .LBB0_2 b .LBB0_1 .LBB0_1: li 3, 0 stw 3, 68(31) stw 3, 64(31) b .LBB0_3 .LBB0_2: lfd 0, 56(31) lis 3, .LCPI0_1@ha lfs 2, .LCPI0_1@l(3) fdiv 0, 0, 2 lis 3, .LCPI0_2@ha lfs 1, .LCPI0_2@l(3) fsub 3, 0, 1 fctiwz 3, 3 addi 3, 31, 36 stfiwx 3, 0, 3 fctiwz 3, 0 addi 3, 31, 32 stfiwx 3, 0, 3 fcmpu 0, 0, 1 lwz 3, 36(31) xoris 4, 3, 32768 lwz 3, 32(31) isellt 3, 3, 4 stw 3, 52(31) lfd 3, 56(31) lwz 3, 52(31) lis 4, 17200 stw 4, 40(31) stw 3, 44(31) lfd 0, 40(31) lis 3, .LCPI0_3@ha lfs 4, .LCPI0_3@l(3) fsub 0, 0, 4 fneg 0, 0 fmadd 0, 0, 2, 3 fsub 2, 0, 1 fctiwz 2, 2 addi 3, 31, 28 stfiwx 2, 0, 3 fctiwz 2, 0 addi 3, 31, 24 stfiwx 2, 0, 3 fcmpu 0, 0, 1 lwz 3, 28(31) xoris 4, 3, 32768 lwz 3, 24(31) isellt 3, 3, 4 stw 3, 48(31) lwz 3, 52(31) lwz 4, 48(31) stw 4, 68(31) stw 3, 64(31) b .LBB0_3 .LBB0_3: lwz 3, 64(31) lwz 4, 68(31) lwz 31, 76(1) addi 1, 1, 80 blr .Lfunc_end0: .size __fixunsdfdi, .Lfunc_end0-.Lfunc_begin0 # -- End function .ident "clang version 17.0.6.1" .section ".note.GNU-stack","",@progbits .addrsig ``` Assembly Generated by GNU for the same __fixunsdfdi ``` .p2align 4,,15 .globl __fixunsdfdi .type __fixunsdfdi, @function __fixunsdfdi: lis 9,.LC1@ha lfs 0,.LC1@l(9) fcmpu 7,1,0 cror 30,28,30 beq- 7,.L7 lis 9,.LC3@ha stwu 1,-32(1) lfs 0,.LC3@l(9) lis 9,.LC5@ha lfs 12,.LC5@l(9) fmul 0,1,0 fcmpu 7,0,12 cror 30,29,30 beq- 7,.L3 fctiwz 0,0 addi 10,1,28 lfs 12,.LC5@l(9) stfiwx 0,0,10 lis 10,0x4330 stw 10,8(1) lis 10,.LC7@ha lfs 0,.LC7@l(10) lis 10,.LC9@ha lwz 3,28(1) lfs 11,.LC9@l(10) stw 3,12(1) lfd 10,8(1) fsub 0,10,0 fneg 0,0 fmadd 1,0,11,1 fcmpu 7,1,12 cror 30,29,30 beq- 7,.L5 .L13: fctiwz 1,1 addi 9,1,20 stfiwx 1,0,9 lwz 4,20(1) addi 1,1,32 blr .p2align 4,,15 .L3: fsub 0,0,12 addi 10,1,24 lfs 12,.LC5@l(9) fctiwz 0,0 stfiwx 0,0,10 lis 10,0x4330 stw 10,8(1) lis 10,.LC7@ha lfs 0,.LC7@l(10) lis 10,.LC9@ha lwz 3,24(1) lfs 11,.LC9@l(10) addis 3,3,0x8000 stw 3,12(1) lfd 10,8(1) fsub 0,10,0 fneg 0,0 fmadd 1,0,11,1 fcmpu 7,1,12 cror 30,29,30 bne+ 7,.L13 .L5: fsub 1,1,12 addi 9,1,16 fctiwz 1,1 stfiwx 1,0,9 lwz 4,16(1) addi 1,1,32 addis 4,4,0x8000 blr .p2align 4,,15 .L7: li 3,0 li 4,0 blr .size __fixunsdfdi,.-__fixunsdfdi .section .rodata.cst4,"aM",@progbits,4 .align 2 .LC1: .long 0 .LC3: .long 796917760 .LC5: .long 1325400064 .LC7: .long 1501560832 .LC9: .long 1333788672 .ident "GCC: (Wind River VxWorks GCC 8.3.0.5 - 2022.02.07) 8.3.0" .gnu_attribute 4, 9 .section .note.GNU-stack,"",@progbits ```

chenzheng1030 commented 6 months ago

If enabling FP invalid exception, compiling with -mcpu=e500mc -mlong-double-64 -mno-spe on AIX 32-bit, I got:

   0x100006b0 <+208>:   fmadd   f0,f0,f2,f3
   0x100006b4 <+212>:   fsub    f2,f0,f1
   0x100006b8 <+216>:   fctiwz  f2,f2
   0x100006bc <+220>:   addi    r3,r1,-52
   0x100006c0 <+224>:   stfiwx  f2,0,r3
=> 0x100006c4 <+228>:   fctiwz  f2,f0. >>>>>>> SIGFPE

(gdb) info registers $f0
f0             2147483712          (raw 0x41e0000008000000)

So because 2147483712 > 2^31, fctiwz sets VXCVI that causes FP invalid exception.

When PPC does instruction selection, I don't think the FP invalid exception is considered, i.e. when generating instructions like fctiwz in convertFPToInt(), the input range will not be checked to avoid the VXCVI type exception.

To model these exception behavior in PPC backend seems requiring huge efforts.

zatrazz commented 5 months ago

Maybe set e500 to use the softfp version instead, as libgcc does?

llvm / llvm-project

[PowerPC] llvm generates NaN in __fixunsdfdi for PPC-32 e500mc #84316