MihuBot / runtime-utils

0 stars 0 forks source link

[JitDiff X64] xtqqczze/dotnet-runtime/UInt128Division #489

Open MihuBot opened 1 week ago

MihuBot commented 1 week ago

Job completed in 14 minutes.

Diffs

Found 261 files with textual diffs.

Summary of Code Size diffs:
(Lower is better)

Total bytes of base: 41569936
Total bytes of diff: 41569869
Total bytes of delta: -67 (-0.00 % of base)
Total relative delta: -1.06
    diff is an improvement.
    relative diff is an improvement.

Top file improvements (bytes):
         -59 : System.Net.Sockets.dasm (-0.02 % of base)
          -8 : System.Private.CoreLib.dasm (-0.00 % of base)

2 total files with Code Size differences (2 improved, 0 regressed), 257 unchanged.

Top method improvements (bytes):
         -59 (-100.00 % of base) : System.Net.Sockets.dasm - System.Net.Sockets.SocketAsyncEngine+<>c:<.ctor>b__15_0(System.Object):this (FullOpts)
          -8 (-5.76 % of base) : System.Private.CoreLib.dasm - System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128 (FullOpts)

Top method improvements (percentages):
         -59 (-100.00 % of base) : System.Net.Sockets.dasm - System.Net.Sockets.SocketAsyncEngine+<>c:<.ctor>b__15_0(System.Object):this (FullOpts)
          -8 (-5.76 % of base) : System.Private.CoreLib.dasm - System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128 (FullOpts)

2 total methods with Code Size differences (2 improved, 0 regressed), 252747 unchanged.

--------------------------------------------------------------------------------

Artifacts:

MihuBot commented 1 week ago

Top method improvements

-8 (-5.76 % of base) - System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128 ```diff ; Assembly listing for method System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128 (FullOpts) ; Emitting BLENDED_CODE for X64 with AVX512 - Unix ; FullOpts code ; optimized code ; rbp based frame ; partially interruptible ; No PGO data -; 0 inlinees with PGO data; 5 single block inlinees; 2 inlinees without PGO data +; 0 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data ; Final local variable assignments ; ;* V00 arg0 [V00 ] ( 0, 0 ) struct (16) zero-ref multireg-arg single-def ;* V01 arg1 [V01 ] ( 0, 0 ) struct (16) zero-ref multireg-arg single-def ;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" ;* V03 tmp1 [V03 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SBR] multireg-ret "Return value temp for multireg return" ;* V04 tmp2 [V04 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SBR] multireg-ret "Return value temp for multireg return" ;* V05 tmp3 [V05 ] ( 0, 0 ) struct (16) zero-ref do-not-enreg[SBR] multireg-ret "Return value temp for multireg return" ;* V06 tmp4 [V06 ] ( 0, 0 ) long -> zero-ref "Inlining Arg" ;* V07 tmp5 [V07 ] ( 0, 0 ) struct (16) zero-ref multireg-ret ld-addr-op "NewObj constructor temp" -;* V08 tmp6 [V08,T04] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp" -;* V09 tmp7 [V09,T12] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp" -;* V10 tmp8 [V10 ] ( 0, 0 ) struct (16) zero-ref multireg-ret ld-addr-op "Inline ldloca(s) first use temp" -;* V11 tmp9 [V11 ] ( 0, 0 ) struct (16) zero-ref multireg-ret ld-addr-op "NewObj constructor temp" -; V12 tmp10 [V12,T03] ( 5, 3 ) long -> rdi single-def "field V00._lower (fldOffset=0x0)" P-INDEP -; V13 tmp11 [V13,T01] ( 6, 3.50) long -> rsi single-def "field V00._upper (fldOffset=0x8)" P-INDEP -; V14 tmp12 [V14,T02] ( 6, 3.50) long -> r8 single-def "field V01._lower (fldOffset=0x0)" P-INDEP -; V15 tmp13 [V15,T00] ( 6, 4 ) long -> rcx single-def "field V01._upper (fldOffset=0x8)" P-INDEP -;* V16 tmp14 [V16 ] ( 0, 0 ) long -> zero-ref "field V03._lower (fldOffset=0x0)" P-DEP -;* V17 tmp15 [V17 ] ( 0, 0 ) long -> zero-ref "field V03._upper (fldOffset=0x8)" P-DEP -;* V18 tmp16 [V18 ] ( 0, 0 ) long -> zero-ref "field V04._lower (fldOffset=0x0)" P-DEP -;* V19 tmp17 [V19 ] ( 0, 0 ) long -> zero-ref "field V04._upper (fldOffset=0x8)" P-DEP -;* V20 tmp18 [V20 ] ( 0, 0 ) long -> zero-ref "field V05._lower (fldOffset=0x0)" P-DEP -;* V21 tmp19 [V21 ] ( 0, 0 ) long -> zero-ref "field V05._upper (fldOffset=0x8)" P-DEP -; V22 tmp20 [V22,T05] ( 2, 1 ) long -> rax "field V07._lower (fldOffset=0x0)" P-INDEP -; V23 tmp21 [V23,T06] ( 2, 1 ) long -> rdx "field V07._upper (fldOffset=0x8)" P-INDEP -; V24 tmp22 [V24,T07] ( 2, 1 ) long -> rax "field V10._lower (fldOffset=0x0)" P-INDEP -; V25 tmp23 [V25,T08] ( 2, 1 ) long -> rdx "field V10._upper (fldOffset=0x8)" P-INDEP -; V26 tmp24 [V26,T09] ( 2, 1 ) long -> rax "field V11._lower (fldOffset=0x0)" P-INDEP -; V27 tmp25 [V27,T10] ( 2, 1 ) long -> rdx "field V11._upper (fldOffset=0x8)" P-INDEP -; V28 tmp26 [V28,T11] ( 2, 1 ) struct (16) [rbp-0x10] do-not-enreg[SR] multireg-ret "Return value temp for multi-reg return (rejected tail call)." +;* V08 tmp6 [V08 ] ( 0, 0 ) struct (16) zero-ref multireg-ret ld-addr-op "Inline ldloca(s) first use temp" +;* V09 tmp7 [V09 ] ( 0, 0 ) struct (16) zero-ref multireg-ret ld-addr-op "NewObj constructor temp" +; V10 tmp8 [V10,T03] ( 4, 2.50) long -> rdi single-def "field V00._lower (fldOffset=0x0)" P-INDEP +; V11 tmp9 [V11,T01] ( 5, 3 ) long -> rsi single-def "field V00._upper (fldOffset=0x8)" P-INDEP +; V12 tmp10 [V12,T02] ( 5, 3 ) long -> r8 single-def "field V01._lower (fldOffset=0x0)" P-INDEP +; V13 tmp11 [V13,T00] ( 5, 3.50) long -> rcx single-def "field V01._upper (fldOffset=0x8)" P-INDEP +;* V14 tmp12 [V14 ] ( 0, 0 ) long -> zero-ref "field V03._lower (fldOffset=0x0)" P-DEP +;* V15 tmp13 [V15 ] ( 0, 0 ) long -> zero-ref "field V03._upper (fldOffset=0x8)" P-DEP +;* V16 tmp14 [V16 ] ( 0, 0 ) long -> zero-ref "field V04._lower (fldOffset=0x0)" P-DEP +;* V17 tmp15 [V17 ] ( 0, 0 ) long -> zero-ref "field V04._upper (fldOffset=0x8)" P-DEP +;* V18 tmp16 [V18 ] ( 0, 0 ) long -> zero-ref "field V05._lower (fldOffset=0x0)" P-DEP +;* V19 tmp17 [V19 ] ( 0, 0 ) long -> zero-ref "field V05._upper (fldOffset=0x8)" P-DEP +; V20 tmp18 [V20,T04] ( 2, 1 ) long -> rax "field V07._lower (fldOffset=0x0)" P-INDEP +; V21 tmp19 [V21,T05] ( 2, 1 ) long -> rdx "field V07._upper (fldOffset=0x8)" P-INDEP +; V22 tmp20 [V22,T06] ( 2, 1 ) long -> rax "field V08._lower (fldOffset=0x0)" P-INDEP +; V23 tmp21 [V23,T07] ( 2, 1 ) long -> rdx "field V08._upper (fldOffset=0x8)" P-INDEP +; V24 tmp22 [V24,T08] ( 2, 1 ) long -> rax "field V09._lower (fldOffset=0x0)" P-INDEP +; V25 tmp23 [V25,T09] ( 2, 1 ) long -> rdx "field V09._upper (fldOffset=0x8)" P-INDEP +; V26 tmp24 [V26,T10] ( 2, 1 ) struct (16) [rbp-0x10] do-not-enreg[SR] multireg-ret "Return value temp for multi-reg return (rejected tail call)." ; ; Lcl frame size = 16 G_M54494_IG01: push rbp sub rsp, 16 lea rbp, [rsp+0x10] mov r8, rdx ;; size=13 bbWeight=1 PerfScore 2.00 G_M54494_IG02: test rcx, rcx - jne SHORT G_M54494_IG05 + jne SHORT G_M54494_IG04 ;; size=5 bbWeight=1 PerfScore 1.25 G_M54494_IG03: test r8, r8 - je SHORT G_M54494_IG13 - test rsi, rsi + jne SHORT G_M54494_IG09 + jmp SHORT G_M54494_IG13 + ;; size=7 bbWeight=0.50 PerfScore 1.62 +G_M54494_IG04: + cmp rsi, rcx + ja SHORT G_M54494_IG11 jne SHORT G_M54494_IG05 - mov rax, rdi - xor edx, edx - div rdx:rax, r8 + cmp rdi, r8 + jae SHORT G_M54494_IG07 + ;; size=12 bbWeight=0.50 PerfScore 1.75 +G_M54494_IG05: + xor eax, eax xor edx, edx - ;; size=20 bbWeight=0.50 PerfScore 32.12 -G_M54494_IG04: + ;; size=4 bbWeight=0.50 PerfScore 0.25 +G_M54494_IG06: add rsp, 16 pop rbp ret ;; size=6 bbWeight=0.50 PerfScore 0.88 -G_M54494_IG05: - cmp rcx, rsi - ja SHORT G_M54494_IG06 - jne SHORT G_M54494_IG09 - cmp r8, rdi - jb SHORT G_M54494_IG09 - ;; size=12 bbWeight=0.50 PerfScore 1.75 -G_M54494_IG06: - cmp r8, rdi - jne SHORT G_M54494_IG07 - cmp rcx, rsi - je SHORT G_M54494_IG11 - ;; size=10 bbWeight=0.50 PerfScore 1.25 G_M54494_IG07: - xor eax, eax + mov eax, 1 xor edx, edx - ;; size=4 bbWeight=0.50 PerfScore 0.25 + ;; size=7 bbWeight=0.50 PerfScore 0.25 G_M54494_IG08: add rsp, 16 pop rbp ret ;; size=6 bbWeight=0.50 PerfScore 0.88 G_M54494_IG09: + test rsi, rsi + jne SHORT G_M54494_IG11 + mov rax, rdi + xor edx, edx + div rdx:rax, r8 + xor edx, edx + ;; size=15 bbWeight=0.50 PerfScore 31.50 +G_M54494_IG10: + add rsp, 16 + pop rbp + ret + ;; size=6 bbWeight=0.50 PerfScore 0.88 +G_M54494_IG11: mov rdx, r8 mov rax, 0xD1FFAB1E ; code for System.UInt128:g__DivideSlow|111_2(System.UInt128,System.UInt128):System.UInt128 call [rax]System.UInt128:g__DivideSlow|111_2(System.UInt128,System.UInt128):System.UInt128 mov qword ptr [rbp-0x10], rax mov qword ptr [rbp-0x08], rdx mov rax, qword ptr [rbp-0x10] mov rdx, qword ptr [rbp-0x08] ;; size=31 bbWeight=0.50 PerfScore 3.75 -G_M54494_IG10: - add rsp, 16 - pop rbp - ret - ;; size=6 bbWeight=0.50 PerfScore 0.88 -G_M54494_IG11: - mov eax, 1 - xor edx, edx - ;; size=7 bbWeight=0.50 PerfScore 0.25 G_M54494_IG12: add rsp, 16 pop rbp ret ;; size=6 bbWeight=0.50 PerfScore 0.88 G_M54494_IG13: mov rax, 0xD1FFAB1E ; code for System.ThrowHelper:ThrowDivideByZeroException() call [rax]System.ThrowHelper:ThrowDivideByZeroException() int3 ;; size=13 bbWeight=0 PerfScore 0.00 -; Total bytes of code 139, prolog size 10, PerfScore 46.12, instruction count 49, allocated bytes for code 139 (MethodHash=8dc72b21) for method System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128 (FullOpts) +; Total bytes of code 131, prolog size 10, PerfScore 45.88, instruction count 46, allocated bytes for code 131 (MethodHash=8dc72b21) for method System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128 (FullOpts) ```
MihuBot commented 1 week ago

@xtqqczze