Closed gfoidl closed 6 years ago
JIT produces much better code.
private static (double min, double max) A(double[] array) { double min = double.MaxValue; double max = double.MinValue; for (int i = 0; i < array.Length; ++i) { min = Math.Min(min, array[i]); max = Math.Max(max, array[i]); } return (min, max); }
; Assembly listing for method ConsoleApplication.Program:A(ref):struct ; Emitting BLENDED_CODE for X64 CPU with AVX ; optimized code ; rbp based frame ; partially interruptible ; Final local variable assignments ; ; V00 arg0 [V00,T02] ( 9, 21 ) ref -> rbx class-hnd ; V01 loc0 [V01,T04] ( 6, 18 ) double -> [rbp-0x28] ; V02 loc1 [V02,T05] ( 6, 18 ) double -> [rbp-0x30] ; V03 loc2 [V03,T00] ( 8, 26 ) int -> r14 ; V04 tmp0 [V04 ] ( 3, 6 ) struct (16) [rbp-0x40] do-not-enreg[XSBR] multireg-ret addr-exposed ;# V05 OutArgs [V05 ] ( 1, 1 ) lclBlk ( 0) [rsp+0x00] ; V06 cse0 [V06,T03] ( 8, 14 ) int -> r15 ; V07 cse1 [V07,T01] ( 6, 24 ) long -> r12 ; ; Lcl frame size = 32 G_M64830_IG01: 55 push rbp 4157 push r15 4156 push r14 4154 push r12 53 push rbx 4883EC20 sub rsp, 32 C5F877 vzeroupper 488D6C2440 lea rbp, [rsp+40H] 488BDF mov rbx, rdi G_M64830_IG02: C4E17B1005A0000000 vmovsd xmm0, qword ptr [reloc @RWD00] C4E17B100D9F000000 vmovsd xmm1, qword ptr [reloc @RWD08] C4E17B114DD0 vmovsd qword ptr [rbp-30H], xmm1 4533F6 xor r14d, r14d 448B7B08 mov r15d, dword ptr [rbx+8] 4585FF test r15d, r15d 7E79 jle SHORT G_M64830_IG07 G_M64830_IG03: 4D63E6 movsxd r12, r14d C4A17B104CE310 vmovsd xmm1, qword ptr [rbx+8*r12+16] E8265D63FF call System.Math:Min(double,double):double C4E17B1145D8 vmovsd qword ptr [rbp-28H], xmm0 C4A17B104CE310 vmovsd xmm1, qword ptr [rbx+8*r12+16] C4E17B1045D0 vmovsd xmm0, qword ptr [rbp-30H] E88E5863FF call System.Math:Max(double,double):double C4E17B1145D0 vmovsd qword ptr [rbp-30H], xmm0 41FFC6 inc r14d 453BFE cmp r15d, r14d 7F3C jg SHORT G_M64830_IG06 G_M64830_IG04: 488D7DC0 lea rdi, bword ptr [rbp-40H] C4E17157C9 vxorpd xmm1, xmm1 C4E17A7F0F vmovdqu qword ptr [rdi], xmm1 488D7DC0 lea rdi, bword ptr [rbp-40H] C4E17B1045D8 vmovsd xmm0, qword ptr [rbp-28H] C4E17B104DD0 vmovsd xmm1, qword ptr [rbp-30H] E885F4FFFF call System.ValueTuple`2[Double,Double][System.Double,System.Double]:.ctor(double,double):this C4E17B1045C0 vmovsd xmm0, qword ptr [rbp-40H] C4E17B104DC8 vmovsd xmm1, qword ptr [rbp-38H] G_M64830_IG05: 488D65E0 lea rsp, [rbp-20H] 5B pop rbx 415C pop r12 415E pop r14 415F pop r15 5D pop rbp C3 ret G_M64830_IG06: C4E17B1045D8 vmovsd xmm0, qword ptr [rbp-28H] EB87 jmp SHORT G_M64830_IG03 G_M64830_IG07: C4E17B1145D8 vmovsd qword ptr [rbp-28H], xmm0 EBB4 jmp SHORT G_M64830_IG04 ; Total bytes of code 188, prolog size 20 for method ConsoleApplication.Program:A(ref):struct
private static (double min, double max) B(double[] array) { double min = double.MaxValue; double max = double.MinValue; for (int i = 0; i < array.Length; ++i) { if (array[i] < min) min = array[i]; if (array[i] > max) max = array[i]; } return (min, max); }
; Assembly listing for method ConsoleApplication.Program:B(ref):struct ; Emitting BLENDED_CODE for X64 CPU with AVX ; optimized code ; rbp based frame ; fully interruptible ; Final local variable assignments ; ; V00 arg0 [V00,T01] ( 7, 13 ) ref -> rdi class-hnd ; V01 loc0 [V01,T04] ( 6, 14 ) double -> mm0 ; V02 loc1 (0.030366990729406, 0.999972697813051) (0.030366990729406, 0.999972697813051) [V02,T05] ( 6, 14 ) double -> mm1 ; V03 loc2 [V03,T00] ( 8, 26 ) int -> rax ; V04 tmp0 [V04 ] ( 3, 6 ) struct (16) [rbp-0x10] do-not-enreg[XSBR] multireg-ret addr-exposed ;# V05 OutArgs [V05 ] ( 1, 1 ) lclBlk ( 0) [rsp+0x00] ; V06 cse0 [V06,T03] ( 10, 32 ) double -> mm2 ; V07 cse1 [V07,T02] ( 8, 14 ) int -> rsi ; ; Lcl frame size = 16 G_M64829_IG01: 55 push rbp 4883EC10 sub rsp, 16 C5F877 vzeroupper 488D6C2410 lea rbp, [rsp+10H] G_M64829_IG02: C4E17B10056A000000 vmovsd xmm0, qword ptr [reloc @RWD00] C4E17B100D69000000 vmovsd xmm1, qword ptr [reloc @RWD08] 33C0 xor eax, eax 8B7708 mov esi, dword ptr [rdi+8] 85F6 test esi, esi 7E28 jle SHORT G_M64829_IG06 G_M64829_IG03: 4863D0 movsxd rdx, eax C4E17B1054D710 vmovsd xmm2, qword ptr [rdi+8*rdx+16] C4E1792EC2 vucomisd xmm0, xmm2 7605 jbe SHORT G_M64829_IG04 C4E17828C2 vmovaps xmm0, xmm2 G_M64829_IG04: C4E1792ED1 vucomisd xmm2, xmm1 7605 jbe SHORT G_M64829_IG05 C4E17828CA vmovaps xmm1, xmm2 G_M64829_IG05: FFC0 inc eax 3BF0 cmp esi, eax 7FD8 jg SHORT G_M64829_IG03 G_M64829_IG06: 488D7DF0 lea rdi, bword ptr [rbp-10H] C4E16957D2 vxorpd xmm2, xmm2 C4E17A7F17 vmovdqu qword ptr [rdi], xmm2 488D7DF0 lea rdi, bword ptr [rbp-10H] E829FFFFFF call System.ValueTuple`2[Double,Double][System.Double,System.Double]:.ctor(double,double):this C4E17B1045F0 vmovsd xmm0, qword ptr [rbp-10H] C4E17B104DF8 vmovsd xmm1, qword ptr [rbp-08H] G_M64829_IG07: 488D6500 lea rsp, [rbp] 5D pop rbp C3 ret ; Total bytes of code 121, prolog size 13 for method ConsoleApplication.Program:B(ref):struct
JIT produces much better code.
Variant A
Variant B