gfoidl / Stochastics

Stochastic tools, distrubution, analysis
MIT License
3 stars 0 forks source link

Sample.MinMax has better code on sequential part #20

Closed gfoidl closed 6 years ago

gfoidl commented 6 years ago

JIT produces much better code.

Variant A

private static (double min, double max) A(double[] array)
{
    double min = double.MaxValue;
    double max = double.MinValue;

    for (int i = 0; i < array.Length; ++i)
    {
        min = Math.Min(min, array[i]);
        max = Math.Max(max, array[i]);
    }

    return (min, max);
}
; Assembly listing for method ConsoleApplication.Program:A(ref):struct
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rbp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  9, 21   )     ref  ->  rbx         class-hnd
;  V01 loc0         [V01,T04] (  6, 18   )  double  ->  [rbp-0x28]  
;  V02 loc1         [V02,T05] (  6, 18   )  double  ->  [rbp-0x30]  
;  V03 loc2         [V03,T00] (  8, 26   )     int  ->  r14        
;  V04 tmp0         [V04    ] (  3,  6   )  struct (16) [rbp-0x40]   do-not-enreg[XSBR] multireg-ret addr-exposed
;# V05 OutArgs      [V05    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]  
;  V06 cse0         [V06,T03] (  8, 14   )     int  ->  r15        
;  V07 cse1         [V07,T01] (  6, 24   )    long  ->  r12        
;
; Lcl frame size = 32

G_M64830_IG01:
       55                   push     rbp
       4157                 push     r15
       4156                 push     r14
       4154                 push     r12
       53                   push     rbx
       4883EC20             sub      rsp, 32
       C5F877               vzeroupper 
       488D6C2440           lea      rbp, [rsp+40H]
       488BDF               mov      rbx, rdi

G_M64830_IG02:
       C4E17B1005A0000000   vmovsd   xmm0, qword ptr [reloc @RWD00]
       C4E17B100D9F000000   vmovsd   xmm1, qword ptr [reloc @RWD08]
       C4E17B114DD0         vmovsd   qword ptr [rbp-30H], xmm1
       4533F6               xor      r14d, r14d
       448B7B08             mov      r15d, dword ptr [rbx+8]
       4585FF               test     r15d, r15d
       7E79                 jle      SHORT G_M64830_IG07

G_M64830_IG03:
       4D63E6               movsxd   r12, r14d
       C4A17B104CE310       vmovsd   xmm1, qword ptr [rbx+8*r12+16]
       E8265D63FF           call     System.Math:Min(double,double):double
       C4E17B1145D8         vmovsd   qword ptr [rbp-28H], xmm0
       C4A17B104CE310       vmovsd   xmm1, qword ptr [rbx+8*r12+16]
       C4E17B1045D0         vmovsd   xmm0, qword ptr [rbp-30H]
       E88E5863FF           call     System.Math:Max(double,double):double
       C4E17B1145D0         vmovsd   qword ptr [rbp-30H], xmm0
       41FFC6               inc      r14d
       453BFE               cmp      r15d, r14d
       7F3C                 jg       SHORT G_M64830_IG06

G_M64830_IG04:
       488D7DC0             lea      rdi, bword ptr [rbp-40H]
       C4E17157C9           vxorpd   xmm1, xmm1
       C4E17A7F0F           vmovdqu  qword ptr [rdi], xmm1
       488D7DC0             lea      rdi, bword ptr [rbp-40H]
       C4E17B1045D8         vmovsd   xmm0, qword ptr [rbp-28H]
       C4E17B104DD0         vmovsd   xmm1, qword ptr [rbp-30H]
       E885F4FFFF           call     System.ValueTuple`2[Double,Double][System.Double,System.Double]:.ctor(double,double):this
       C4E17B1045C0         vmovsd   xmm0, qword ptr [rbp-40H]
       C4E17B104DC8         vmovsd   xmm1, qword ptr [rbp-38H]

G_M64830_IG05:
       488D65E0             lea      rsp, [rbp-20H]
       5B                   pop      rbx
       415C                 pop      r12
       415E                 pop      r14
       415F                 pop      r15
       5D                   pop      rbp
       C3                   ret      

G_M64830_IG06:
       C4E17B1045D8         vmovsd   xmm0, qword ptr [rbp-28H]
       EB87                 jmp      SHORT G_M64830_IG03

G_M64830_IG07:
       C4E17B1145D8         vmovsd   qword ptr [rbp-28H], xmm0
       EBB4                 jmp      SHORT G_M64830_IG04

; Total bytes of code 188, prolog size 20 for method ConsoleApplication.Program:A(ref):struct

Variant B

private static (double min, double max) B(double[] array)
{
    double min = double.MaxValue;
    double max = double.MinValue;

    for (int i = 0; i < array.Length; ++i)
    {
        if (array[i] < min) min = array[i];
        if (array[i] > max) max = array[i];
    }

    return (min, max);
}
; Assembly listing for method ConsoleApplication.Program:B(ref):struct
; Emitting BLENDED_CODE for X64 CPU with AVX
; optimized code
; rbp based frame
; fully interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  7, 13   )     ref  ->  rdi         class-hnd
;  V01 loc0         [V01,T04] (  6, 14   )  double  ->  mm0        
;  V02 loc1   (0.030366990729406, 0.999972697813051)
(0.030366990729406, 0.999972697813051)
      [V02,T05] (  6, 14   )  double  ->  mm1        
;  V03 loc2         [V03,T00] (  8, 26   )     int  ->  rax        
;  V04 tmp0         [V04    ] (  3,  6   )  struct (16) [rbp-0x10]   do-not-enreg[XSBR] multireg-ret addr-exposed
;# V05 OutArgs      [V05    ] (  1,  1   )  lclBlk ( 0) [rsp+0x00]  
;  V06 cse0         [V06,T03] ( 10, 32   )  double  ->  mm2        
;  V07 cse1         [V07,T02] (  8, 14   )     int  ->  rsi        
;
; Lcl frame size = 16

G_M64829_IG01:
       55                   push     rbp
       4883EC10             sub      rsp, 16
       C5F877               vzeroupper 
       488D6C2410           lea      rbp, [rsp+10H]

G_M64829_IG02:
       C4E17B10056A000000   vmovsd   xmm0, qword ptr [reloc @RWD00]
       C4E17B100D69000000   vmovsd   xmm1, qword ptr [reloc @RWD08]
       33C0                 xor      eax, eax
       8B7708               mov      esi, dword ptr [rdi+8]
       85F6                 test     esi, esi
       7E28                 jle      SHORT G_M64829_IG06

G_M64829_IG03:
       4863D0               movsxd   rdx, eax
       C4E17B1054D710       vmovsd   xmm2, qword ptr [rdi+8*rdx+16]
       C4E1792EC2           vucomisd xmm0, xmm2
       7605                 jbe      SHORT G_M64829_IG04
       C4E17828C2           vmovaps  xmm0, xmm2

G_M64829_IG04:
       C4E1792ED1           vucomisd xmm2, xmm1
       7605                 jbe      SHORT G_M64829_IG05
       C4E17828CA           vmovaps  xmm1, xmm2

G_M64829_IG05:
       FFC0                 inc      eax
       3BF0                 cmp      esi, eax
       7FD8                 jg       SHORT G_M64829_IG03

G_M64829_IG06:
       488D7DF0             lea      rdi, bword ptr [rbp-10H]
       C4E16957D2           vxorpd   xmm2, xmm2
       C4E17A7F17           vmovdqu  qword ptr [rdi], xmm2
       488D7DF0             lea      rdi, bword ptr [rbp-10H]
       E829FFFFFF           call     System.ValueTuple`2[Double,Double][System.Double,System.Double]:.ctor(double,double):this
       C4E17B1045F0         vmovsd   xmm0, qword ptr [rbp-10H]
       C4E17B104DF8         vmovsd   xmm1, qword ptr [rbp-08H]

G_M64829_IG07:
       488D6500             lea      rsp, [rbp]
       5D                   pop      rbp
       C3                   ret      

; Total bytes of code 121, prolog size 13 for method ConsoleApplication.Program:B(ref):struct