Incorrect results of floating point multiplication and higher compiler optimization

neiling commented 3 years ago

Describe the bug Keystone version: v1.0.0 riscv64-unknown-linux-musl-gcc version: 10.2.0

Different results of floating point multiplication when the code was compiled with higher optimization.

Screenshots or Error Log calc_pi

Additional context

Hello everyone,

I observed a strange behavior related to floating point multiplication and higher compiler optimization.

The following code should approximate calculate pi. If I compile the code with -O0 all works fine, and I get every time the same result. If I compile it with e.g. -O1 the result is not a number, or it looks like a random value. I have not done any deeper debugging yet. Maybe that's a known issue and some can explain me what here happens. Thanks a lot!

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

int main() {

  const uint64_t N = 100000000;
  const double delta_x = 1. / N;
  double sum = 0., x, pi;
  for (uint64_t i = 0; i < N; i++) {
    x = (i + 0.5) * delta_x;
    sum += (4.0 / (1.0 + x * x));
    pi = sum * delta_x;
  }
  pi = sum * delta_x;
  printf("Pi=%.15lf\n", pi);

  return EXIT_SUCCESS;
}

dayeol commented 3 years ago

This might be related to how we manage fp in the runtime. But to make sure, can you try to run them outside of the enclave and see if they're fine?

neiling commented 3 years ago

If I run it outside the enclave, all works fine and as expected.

dkohlbre commented 3 years ago

This is pretty odd. Best guess is that the FPU state isn't clean due to some issue with our context switch and the O1 assumes it is?

Can you post the assembly for your main for both the O0 and O1 binaries?

neiling commented 3 years ago

                             **************************************************************
                             *                            GCC -O1                         *
                             **************************************************************
                             int __stdcall main(void)
                               assume gp = 0x17800
             int               a0:4           <RETURN>
                             main                                            XREF[4]:     Entry Point(*), 
                                                                                          _start_c:0001016e(*), 
                                                                                          00017148(*), 
                                                                                          .debug_frame::00000040(*)  
        00010208 41 11           c.addi     sp,-0x10
             assume gp = <UNKNOWN>
        0001020a 06 e4           c.sdsp     ra,0x8(sp)
        0001020c 81 47           c.li       a5,0x0
        0001020e 53 07 00 f2     fmv.d.x    fa4,zero
        00010212 17 47 00 00     auipc      a4,0x4
        00010216 07 35 e7 66     fld        fa0,0x66e(a4=>DAT_00014880)                      = 3FE0000000000000h
        0001021a 17 47 00 00     auipc      a4,0x4
        0001021e 87 35 e7 66     fld        fa1,0x66e(a4=>DAT_00014888)                      = 3E45798EE2308C3Ah
        00010222 17 47 00 00     auipc      a4,0x4
        00010226 07 36 e7 66     fld        fa2,0x66e(a4=>DAT_00014890)                      = 3FF0000000000000h
        0001022a 17 47 00 00     auipc      a4,0x4
        0001022e 87 36 e7 66     fld        fa3,0x66e(a4=>DAT_00014898)                      = 4010000000000000h
        00010232 37 e7 f5 05     lui        a4,0x5f5e
        00010236 13 07 07 10     addi       a4,a4,0x100
                             LAB_0001023a                                    XREF[1]:     00010258(j)  
        0001023a d3 f7 37 d2     fcvt.d.lu  fa5,a5,dyn
        0001023e d3 f7 a7 02     fadd.d     fa5,fa5,fa0,dyn
        00010242 d3 f7 b7 12     fmul.d     fa5,fa5,fa1,dyn
        00010246 d3 f7 f7 12     fmul.d     fa5,fa5,fa5,dyn
        0001024a d3 f7 c7 02     fadd.d     fa5,fa5,fa2,dyn
        0001024e d3 f7 f6 1a     fdiv.d     fa5,fa3,fa5,dyn
        00010252 53 77 f7 02     fadd.d     fa4,fa4,fa5,dyn
        00010256 85 07           c.addi     a5,0x1
        00010258 e3 91 e7 fe     bne        a5,a4,LAB_0001023a
        0001025c 97 47 00 00     auipc      a5,0x4
        00010260 87 b7 c7 62     fld        fa5,0x62c(a5=>DAT_00014888)                      = 3E45798EE2308C3Ah
        00010264 d3 77 f7 12     fmul.d     fa5,fa4,fa5,dyn
        00010268 d3 85 07 e2     fmv.x.d    a1,fa5
        0001026c 17 45 00 00     auipc      a0,0x4
        00010270 13 05 45 60     addi       a0=>s_Pi=%.15lf_00014870,a0,0x604                = "Pi=%.15lf\n"
        00010274 ef 00 60 21     jal        ra,printf                                        int printf(char * __format, ...)
        00010278 01 45           c.li       a0,0x0
        0001027a a2 60           c.ldsp     ra,0x8(sp)
        0001027c 41 01           c.addi     sp,0x10
        0001027e 82 80           ret

                             **************************************************************
                             *                            GCC -O0                         *
                             **************************************************************
                             int __stdcall main(void)
                               assume gp = 0x17800
             int               a0:4           <RETURN>
             double            Stack[-0x10]:8 x
             double            Stack[-0x18]:8 pi                                      XREF[2]:     0001025e(*), 
                                                                                                   00010262(*)  
             double            Stack[-0x20]:8 delta_x                                 XREF[3]:     0001029e(*), 
                                                                                                   000102c4(*), 
                                                                                                   000102c8(*)  
             uint64_t          Stack[-0x28]:8 N                                       XREF[4]:     00010232(*), 
                                                                                                   00010256(*), 
                                                                                                   00010296(*), 
                                                                                                   000102bc(*)  
             uint64_t          Stack[-0x30]:8 i                                       XREF[3]:     00010218(*), 
                                                                                                   0001021c(*), 
                                                                                                   000102b0(*)  
             double            Stack[-0x38]:8 sum                                     XREF[5]:     0001023a(*), 
                                                                                                   00010240(*), 
                                                                                                   000102a2(*), 
                                                                                                   000102a8(*), 
                                                                                                   000102ac(*)  
             undefined8        Stack[-0x40]:8 local_40                                XREF[5]:     00010236(*), 
                                                                                                   00010286(*), 
                                                                                                   0001028e(*), 
                                                                                                   00010292(*), 
                                                                                                   000102b8(*)  
                             main                                            XREF[4]:     Entry Point(*), 
                                                                                          _start_c:0001016e(*), 
                                                                                          00017148(*), 
                                                                                          .debug_frame::00000040(*)  
        00010208 39 71           c.addi16sp sp,-0x40
             assume gp = <UNKNOWN>
        0001020a 06 fc           c.sdsp     ra,0x38(sp)
        0001020c 22 f8           c.sdsp     s0,0x30(sp)
        0001020e 80 00           c.addi4spn s0,sp,0x40
        00010210 b7 e7 f5 05     lui        a5,0x5f5e
        00010214 93 87 07 10     addi       a5,a5,0x100
        00010218 23 38 f4 fc     sd         a5,-0x30=>i(s0)
        0001021c 83 37 04 fd     ld         a5,-0x30=>i(s0)
        00010220 d3 f7 37 d2     fcvt.d.lu  fa5,a5,dyn
        00010224 97 47 00 00     auipc      a5,0x4
        00010228 93 87 c7 6b     addi       a5,a5,0x6bc
        0001022c 98 23           c.fld      fa4,0x0(a5=>DAT_000148e0)                        = 3FF0000000000000h
        0001022e d3 77 f7 1a     fdiv.d     fa5,fa4,fa5,dyn
        00010232 27 3c f4 fc     fsd        fa5,-0x28=>N(s0)
        00010236 23 30 04 fc     sd         zero,-0x40=>local_40(s0)
        0001023a 23 34 04 fc     sd         zero,-0x38=>sum(s0)
        0001023e bd a0           c.j        LAB_000102ac
                             LAB_00010240                                    XREF[1]:     000102b4(j)  
        00010240 83 37 84 fc     ld         a5,-0x38=>sum(s0)
        00010244 53 f7 37 d2     fcvt.d.lu  fa4,a5,dyn
        00010248 97 47 00 00     auipc      a5,0x4
        0001024c 93 87 07 6a     addi       a5,a5,0x6a0
        00010250 9c 23           c.fld      fa5,0x0(a5=>DAT_000148e8)                        = 3FE0000000000000h
        00010252 d3 77 f7 02     fadd.d     fa5,fa4,fa5,dyn
        00010256 07 37 84 fd     fld        fa4,-0x28=>N(s0)
        0001025a d3 77 f7 12     fmul.d     fa5,fa4,fa5,dyn
        0001025e 27 34 f4 fe     fsd        fa5,-0x18=>pi(s0)
        00010262 87 37 84 fe     fld        fa5,-0x18=>pi(s0)
        00010266 53 f7 f7 12     fmul.d     fa4,fa5,fa5,dyn
        0001026a 97 47 00 00     auipc      a5,0x4
        0001026e 93 87 67 67     addi       a5,a5,0x676
        00010272 9c 23           c.fld      fa5,0x0(a5=>DAT_000148e0)                        = 3FF0000000000000h
        00010274 d3 77 f7 02     fadd.d     fa5,fa4,fa5,dyn
        00010278 97 47 00 00     auipc      a5,0x4
        0001027c 93 87 87 67     addi       a5,a5,0x678
        00010280 98 23           c.fld      fa4,0x0(a5=>DAT_000148f0)                        = 4010000000000000h
        00010282 d3 77 f7 1a     fdiv.d     fa5,fa4,fa5,dyn
        00010286 07 37 04 fc     fld        fa4,-0x40=>local_40(s0)
        0001028a d3 77 f7 02     fadd.d     fa5,fa4,fa5,dyn
        0001028e 27 30 f4 fc     fsd        fa5,-0x40=>local_40(s0)
        00010292 07 37 04 fc     fld        fa4,-0x40=>local_40(s0)
        00010296 87 37 84 fd     fld        fa5,-0x28=>N(s0)
        0001029a d3 77 f7 12     fmul.d     fa5,fa4,fa5,dyn
        0001029e 27 30 f4 fe     fsd        fa5,-0x20=>delta_x(s0)
        000102a2 83 37 84 fc     ld         a5,-0x38=>sum(s0)
        000102a6 85 07           c.addi     a5,0x1
        000102a8 23 34 f4 fc     sd         a5,-0x38=>sum(s0)
                             LAB_000102ac                                    XREF[1]:     0001023e(j)  
        000102ac 03 37 84 fc     ld         a4,-0x38=>sum(s0)
        000102b0 83 37 04 fd     ld         a5,-0x30=>i(s0)
        000102b4 e3 66 f7 f8     bltu       a4,a5,LAB_00010240
        000102b8 07 37 04 fc     fld        fa4,-0x40=>local_40(s0)
        000102bc 87 37 84 fd     fld        fa5,-0x28=>N(s0)
        000102c0 d3 77 f7 12     fmul.d     fa5,fa4,fa5,dyn
        000102c4 27 30 f4 fe     fsd        fa5,-0x20=>delta_x(s0)
        000102c8 83 35 04 fe     ld         a1,-0x20=>delta_x(s0)
        000102cc 17 45 00 00     auipc      a0,0x4
        000102d0 13 05 45 60     addi       a0=>s_Pi=%.15lf_000148d0,a0,0x604                = "Pi=%.15lf\n"
        000102d4 ef 00 a0 21     jal        ra,printf                                        int printf(char * __format, ...)
        000102d8 81 47           c.li       a5,0x0
        000102da 3e 85           c.mv       a0,a5
        000102dc e2 70           c.ldsp     ra,0x38(sp)
        000102de 42 74           c.ldsp     s0,0x30(sp)
        000102e0 21 61           c.addi16sp sp,0x40
        000102e2 82 80           ret

keystone-enclave / keystone

Incorrect results of floating point multiplication and higher compiler optimization #247