dotnet / runtime

.NET is a cross-platform runtime for cloud, mobile, desktop, and IoT apps.
https://docs.microsoft.com/dotnet/core/
MIT License
15.27k stars 4.73k forks source link

Inlined Struct wrapper .ctors are non-zero cost #10540

Closed benaadams closed 3 years ago

benaadams commented 6 years ago

(Some now fixed by https://github.com/dotnet/coreclr/pull/19429)

This is problematic for example when adding formatting and/or buffering capabilities over Span<T>

As seen in "Add support for BufferWriter<T> to the JsonWriter" https://github.com/dotnet/corefxlab/pull/2366

When a struct wraps a struct (but has no additional fields) it becomes a non-zero cost abstraction for its constructor (but not method calls) e.g. sharplab.io

Given

using System.Runtime.CompilerServices;

public static class Program
{  
   public static void ViaClass()
   {
       var c = new C();
       c.M();
   }

   public static void ViaStruct1()
   {
       var s1 = new S1(new C());
       s1.M();
   }

   public static void ViaStruct2()
   {
       var s2 = new S2(new S1(new C()));
       s2.M();
   }

   public static void ViaStruct3()
   {
       var s3 = new S3(new S2(new S1(new C())));
       s3.M();
   }

   public static void ViaStruct4()
   {
       var s4 = new S4(new S3(new S2(new S1(new C()))));
       s4.M();
   }

   public static void ViaStruct5()
   {
       var s5 = new S5(new S4(new S3(new S2(new S1(new C())))));
       s5.M();
   }
}

public class C 
{
    [MethodImpl(MethodImplOptions.NoInlining)]
    public void M()
    {
    }
}

public struct S1
{
    private C _c;

    public S1(C c) => _c = c;    
    public void M() => _c.M();
}

public struct S2
{
    private S1 _s;

    public S2(S1 s) => _s = s;    
    public void M() => _s.M();
}

public struct S3
{
    private S2 _s;

    public S3(S2 s) => _s = s;    
    public void M() => _s.M();
}

public struct S4
{
    private S3 _s;

    public S4(S3 s) => _s = s;    
    public void M() => _s.M();
}

public struct S5
{
    private S4 _s;

    public S5(S4 s) => _s = s;    
    public void M() => _s.M();
}

Each additional wrapper introduces redundant work in the asm e.g.

Program.ViaClass()
    L0000: sub rsp, 0x28
    L0004: mov rcx, 0x7ffa88b77008
    L000e: call 0x7ffadf232520
    L0013: mov rcx, rax
    L0016: call C.M()
    L001b: nop
    L001c: add rsp, 0x28
    L0020: ret

Program.ViaStruct1()
    L0000: sub rsp, 0x28
    L0004: mov rcx, 0x7ffa88b77008
    L000e: call 0x7ffadf232520
    L0013: mov rcx, rax
    L0016: call C.M()
    L001b: nop
    L001c: add rsp, 0x28
    L0020: ret

Program.ViaStruct2()
    L0000: sub rsp, 0x28
    L0004: mov rcx, 0x7ffa88b77008
    L000e: call 0x7ffadf232520
    L0013: mov rcx, rax
    L0016: mov rax, rcx
    L0019: mov rcx, rax
    L001c: mov [rsp+0x20], rcx
    L0021: mov rcx, [rsp+0x20]
    L0026: cmp [rcx], ecx
    L0028: call C.M()
    L002d: nop
    L002e: add rsp, 0x28
    L0032: ret

Program.ViaStruct3()
    L0000: sub rsp, 0x28
    L0004: xor eax, eax
    L0006: mov [rsp+0x20], rax
    L000b: mov rcx, 0x7ffa88b77008
    L0015: call 0x7ffadf232520
    L001a: mov rcx, rax
    L001d: mov rax, rcx
    L0020: mov rcx, rax
    L0023: mov rax, rcx
    L0026: mov rcx, rax
    L0029: lea rax, [rsp+0x20]
    L002e: mov [rax], rcx
    L0031: mov rcx, [rsp+0x20]
    L0036: cmp [rcx], ecx
    L0038: call C.M()
    L003d: nop
    L003e: add rsp, 0x28
    L0042: ret

Program.ViaStruct4()
    L0000: push rdi
    L0001: sub rsp, 0x40
    L0005: lea rdi, [rsp+0x28]
    L000a: mov ecx, 0x6
    L000f: xor eax, eax
    L0011: rep stosd
    L0013: mov rcx, 0x7ffa88b77008
    L001d: call 0x7ffadf232520
    L0022: mov rcx, rax
    L0025: mov rax, rcx
    L0028: mov rcx, rax
    L002b: mov rax, rcx
    L002e: xor ecx, ecx
    L0030: mov [rsp+0x30], rcx
    L0035: mov rcx, rax
    L0038: lea rax, [rsp+0x30]
    L003d: mov [rax], rcx
    L0040: mov rcx, [rsp+0x30]
    L0045: mov [rsp+0x28], rcx
    L004a: mov rcx, [rsp+0x28]
    L004f: lea rax, [rsp+0x38]
    L0054: mov [rax], rcx
    L0057: mov rcx, [rsp+0x38]
    L005c: cmp [rcx], ecx
    L005e: call C.M()
    L0063: nop
    L0064: add rsp, 0x40
    L0068: pop rdi
    L0069: ret

Program.ViaStruct5()
    L0000: push rdi
    L0001: sub rsp, 0x50
    L0005: lea rdi, [rsp+0x28]
    L000a: mov ecx, 0xa
    L000f: xor eax, eax
    L0011: rep stosd
    L0013: mov rcx, 0x7ffa88b77008
    L001d: call 0x7ffadf232520
    L0022: mov rcx, rax
    L0025: mov rax, rcx
    L0028: mov rcx, rax
    L002b: mov rax, rcx
    L002e: xor ecx, ecx
    L0030: mov [rsp+0x40], rcx
    L0035: mov rcx, rax
    L0038: lea rax, [rsp+0x40]
    L003d: mov [rax], rcx
    L0040: xor ecx, ecx
    L0042: mov [rsp+0x38], rcx
    L0047: mov rcx, [rsp+0x40]
    L004c: mov [rsp+0x30], rcx
    L0051: mov rcx, [rsp+0x30]
    L0056: lea rax, [rsp+0x38]
    L005b: mov [rax], rcx
    L005e: mov rcx, [rsp+0x38]
    L0063: mov [rsp+0x28], rcx
    L0068: mov rcx, [rsp+0x28]
    L006d: lea rax, [rsp+0x48]
    L0072: mov [rax], rcx
    L0075: mov rcx, [rsp+0x48]
    L007a: cmp [rcx], ecx
    L007c: call C.M()
    L0081: nop
    L0082: add rsp, 0x50
    L0086: pop rdi
    L0087: ret

category:cq theme:structs skill-level:expert cost:medium

benaadams commented 5 years ago

Looking at StreamReader.ReadBufferAsync() in coreclr they are all marked as do-not-enreg

NewObj constructor temp is one of the reasons, and must-init addr-exposed ld-addr-op the other (could be due to in which reduced an additional copy? https://github.com/dotnet/coreclr/pull/22738)

; Assembly listing for method <ReadBufferAsync>d__69:MoveNext():this
       call     qword ptr [rax+32]Stream:ReadAsync(struct,struct):struct:this
       mov      rcx, gword ptr [rbp-58H]
       mov      eax, dword ptr [rbp-50H]
       mov      edx, dword ptr [rbp-4CH]
       movsx    rdx, dx
       xor      r8d, r8d
       lea      r9, bword ptr [rbp-B8H] ;  do-not-enreg[XSB] must-init addr-exposed ld-addr-op "Inline stloc first use temp"
       mov      gword ptr [r9], rcx
       mov      dword ptr [r9+8], eax
       mov      word  ptr [r9+12], dx
       mov      byte  ptr [r9+14], r8b

G_M42774_IG33:
       movdqu   xmm0, qword ptr [rbp-B8H] 
       movdqu   qword ptr [rbp-C8H], xmm0 ;  do-not-enreg[SFB] must-init "NewObj constructor temp"

G_M42774_IG34:
       movdqu   xmm0, qword ptr [rbp-C8H]
       movdqu   qword ptr [rbp-68H], xmm0 ;  do-not-enreg[XSFB] must-init addr-exposed ld-addr-op

G_M42774_IG35:
       movdqu   xmm0, qword ptr [rbp-68H]
       movdqu   qword ptr [rbp-D8H], xmm0 ;  do-not-enreg[SFB] must-init "NewObj constructor temp"

G_M42774_IG36:
       movdqu   xmm0, qword ptr [rbp-D8H]
       movdqu   qword ptr [rbp-78H], xmm0  ; do-not-enreg[XSFB] must-init addr-exposed ld-addr-op

G_M42774_IG37:
       mov      rsi, gword ptr [rbp-78H]
benaadams commented 5 years ago
***** BB41, stmt 109
     ( 10, 10) [001137] ------------  *  STMT      void  (IL 0x187...  ???)
N005 ( 10, 10) [001136] -A--G---R---  \--*  ASG       struct (copy) $VN.Void
N004 (  6,  7) [001135] n-----------     +--*  BLK(16)   struct
N003 (  3,  5) [001134] ------------     |  \--*  ADDR      byref  $19d
N002 (  3,  4) [001129] D------N----     |     \--*  LCL_FLD   struct V51 tmp37        d:2[+0] Fseq[_value]
N001 (  3,  2) [001131] ----G--N----     \--*  LCL_VAR   struct(AX) V50 tmp36         $345

***** BB41, stmt 110                  
     (  7,  5) [000142] ------------  *  STMT      void  (IL   ???...  ???)
N003 (  7,  5) [001091] -A------R---  \--*  ASG       struct (copy) $VN.Void
N002 (  3,  2) [001090] D------N----     +--*  LCL_VAR   struct V10 loc9         d:2
N001 (  3,  2) [001088] ------------     \--*  LCL_VAR   struct V51 tmp37        u:2 (last use) $383

***** BB41, stmt 111                  
     ( 13, 15) [001168] ------------  *  STMT      void  (IL 0x191...  ???)
N007 ( 13, 15) [001167] -A--G---R---  \--*  ASG       struct (copy) $VN.Void
N006 (  6,  7) [001166] n-----------     +--*  BLK(16)   struct
N005 (  3,  5) [001165] ------------     |  \--*  ADDR      byref  $19f
N004 (  3,  4) [001158] D------N----     |     \--*  LCL_FLD   struct V55 tmp41        d:2[+0] Fseq[_value]
N003 (  6,  7) [001163] n---G-------     \--*  IND       struct $3c3
N002 (  3,  5) [001159] ----G-------        \--*  ADDR      byref  $19e
N001 (  3,  4) [001162] -------N----           \--*  LCL_FLD   struct V10 loc9         u:2[+0] Fseq[_value] (last use) $3c3

***** BB41, stmt 112                  
     (  7,  5) [000151] ------------  *  STMT      void  (IL   ???...  ???)
N003 (  7,  5) [001155] -A--G---R---  \--*  ASG       struct (copy) $VN.Void
N002 (  3,  2) [001154] D---G--N----     +--*  LCL_VAR   struct(AX) V12 loc11        
N001 (  3,  2) [001152] ------------     \--*  LCL_VAR   struct V55 tmp41        u:2 (last use) $346

***** BB41, stmt 113                  
     (  3,  4) [001186] ------------  *  STMT      void  (IL 0x19A...  ???)
N003 (  3,  4) [001185] -A--G---R---  \--*  ASG       ref    $31d
N002 (  1,  1) [001184] D------N----     +--*  LCL_VAR   ref    V57 tmp43        d:2 $31d
N001 (  3,  4) [001182] ----G-------     \--*  LCL_FLD   ref    V12 loc11        [+0] Fseq[_value, _obj] $31d
  N1223.                    IL_OFFSET IL offset: 0x187
  N1225.                    V50 MEM
  N1227.                    LCL_FLD_ADDR V51 tmp37        d:2[+0] Fseq[_value] NA
  N1229.                    STORE_BLK(16)
  N1231.                    V51 MEM
  N1233.                    LCL_VAR_ADDR V10 loc9         d:2 NA
  N1235.                    STORE_BLK(16)
  N1237.                    IL_OFFSET IL offset: 0x191
  N1239.                    LCL_FLD_ADDR V10 loc9         u:2[+0] Fseq[_value] NA (last use)
  N1241.                    IND      
  N1243.                    LCL_FLD_ADDR V55 tmp41        d:2[+0] Fseq[_value] NA
  N1245.                    STORE_BLK(16)
  N1247.                    V55 MEM
  N1249.                    LCL_VAR_ADDR V12 loc11         NA
  N1251.                    STORE_BLK(16)
  N1253.                    IL_OFFSET IL offset: 0x19a
  N1255. rsi             =  V12 MEM
IN00e3:        mov      byte  ptr [r9+14], r8b
genIPmappingAdd: ignoring duplicate IL offset 0x187
Generating: N1223 ( 10, 10) [001137] ------------                 IL_OFFSET void   IL offset: 0x187 REG NA
Generating: N1225 (  3,  2) [001131] -c-----N----      t1131 =    LCL_VAR   struct(AX) V50 tmp36         NA REG NA $345
Generating: N1227 (  3,  4) [001129] Dc-----N----      t1129 =    LCL_FLD_ADDR byref  V51 tmp37        d:2[+0] Fseq[_value] NA REG NA
                                                             /--*  t1129  byref  
                                                             +--*  t1131  struct 
Generating: N1229 (  6,  7) [001135] nA--G-------              *  STORE_BLK(16) struct (copy) (Unroll) REG NA

      G_M33302_IG32:        ; offs=000353H, funclet=00
IN00e4:        movdqu   xmm0, qword ptr [V50 rbp-B8H]
IN00e5:        movdqu   qword ptr [V51 rbp-C8H], xmm0
Generating: N1231 (  3,  2) [001088] -c----------      t1088 =    LCL_VAR   struct V51 tmp37        u:2 NA (last use) REG NA $383
Generating: N1233 (  3,  2) [001090] Dc-----N----      t1090 =    LCL_VAR_ADDR byref  V10 loc9         d:2 NA REG NA
                                                             /--*  t1090  byref  
                                                             +--*  t1088  struct 
Generating: N1235 (???,???) [002170] nA----------              *  STORE_BLK(16) struct (copy) (Unroll) REG NA

      G_M33302_IG33:        ; offs=0003A5H, funclet=00
IN00e6:        movdqu   xmm0, qword ptr [V51 rbp-C8H]
IN00e7:        movdqu   qword ptr [V10 rbp-68H], xmm0
Added IP mapping: 0x0191 STACK_EMPTY (G_M33302_IG34,ins#2,ofs#13)
Generating: N1237 ( 13, 15) [001168] ------------                 IL_OFFSET void   IL offset: 0x191 REG NA
Generating: N1239 (  3,  4) [001162] -c--G--N----      t1162 =    LCL_FLD_ADDR byref  V10 loc9         u:2[+0] Fseq[_value] NA (last use) REG NA
                                                             /--*  t1162  byref  
Generating: N1241 (  6,  7) [001163] nc--G-------      t1163 = *  IND       struct REG NA $3c3
Generating: N1243 (  3,  4) [001158] Dc-----N----      t1158 =    LCL_FLD_ADDR byref  V55 tmp41        d:2[+0] Fseq[_value] NA REG NA
                                                             /--*  t1158  byref  
                                                             +--*  t1163  struct 
Generating: N1245 (  6,  7) [001166] nA--G-------              *  STORE_BLK(16) struct (copy) (Unroll) REG NA

      G_M33302_IG34:        ; offs=0003B5H, funclet=00
IN00e8:        movdqu   xmm0, qword ptr [V10 rbp-68H]
IN00e9:        movdqu   qword ptr [V55 rbp-D8H], xmm0
Generating: N1247 (  3,  2) [001152] -c----------      t1152 =    LCL_VAR   struct V55 tmp41        u:2 NA (last use) REG NA $346
Generating: N1249 (  3,  2) [001154] Dc-----N----      t1154 =    LCL_VAR_ADDR byref  V12 loc11         NA REG NA
                                                             /--*  t1154  byref  
                                                             +--*  t1152  struct 
Generating: N1251 (???,???) [002171] nA----------              *  STORE_BLK(16) struct (copy) (Unroll) REG NA

      G_M33302_IG35:        ; offs=0003C2H, funclet=00
IN00ea:        movdqu   xmm0, qword ptr [V55 rbp-D8H]
IN00eb:        movdqu   qword ptr [V12 rbp-78H], xmm0
Added IP mapping: 0x019A STACK_EMPTY (G_M33302_IG36,ins#2,ofs#13)
Generating: N1253 (  3,  4) [001186] ------------                 IL_OFFSET void   IL offset: 0x19a REG NA
Generating: N1255 (  3,  4) [001182] ------------      t1182 =    LCL_FLD   ref    V12 loc11        [+0] Fseq[_value, _obj] rsi REG rsi $31d

      G_M33302_IG36:        ; offs=0003CFH, funclet=00
IN00ec:        mov      rsi, gword ptr [V12 rbp-78H]
mikedn commented 5 years ago

Probably because there are a bunch of struct typed LCL_FLD nodes that the JIT cannot handle well.

benaadams commented 5 years ago

struct typed LCL_FLD nodes that the JIT cannot handle well.

Hmm, because its in the il?

With https://github.com/dotnet/coreclr/blob/b8d5b7b760f64d39e00554189ea0e5c66ed6bd62/src/System.Private.CoreLib/shared/System/IO/StreamReader.cs#L1280

await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer)).ConfigureAwait(false)

Becoming stloc.s, ldloca.s chains

.locals init (
    ...
    [8] valuetype System.Threading.Tasks.ValueTask`1<int32>,
    [9] valuetype System.Runtime.CompilerServices.ConfiguredValueTaskAwaitable`1<int32>,
    ...
    [11] valuetype System.Runtime.CompilerServices.ConfiguredValueTaskAwaitable`1/ConfiguredValueTaskAwaiter<int32>,
    ...
)

IL_0180: callvirt instance valuetype System.Threading.Tasks.ValueTask`1<int32> System.IO.Stream::ReadAsync(valuetype System.Memory`1<uint8>, valuetype System.Threading.CancellationToken)
IL_0185: stloc.s 8
IL_0187: ldloca.s 8
IL_0189: ldc.i4.0
IL_018a: call instance valuetype System.Runtime.CompilerServices.ConfiguredValueTaskAwaitable`1<!0> valuetype System.Threading.Tasks.ValueTask`1<int32>::ConfigureAwait(bool)
IL_018f: stloc.s 9
IL_0191: ldloca.s 9
IL_0193: call instance valuetype System.Runtime.CompilerServices.ConfiguredValueTaskAwaitable`1/ConfiguredValueTaskAwaiter<!0> valuetype System.Runtime.CompilerServices.ConfiguredValueTaskAwaitable`1<int32>::GetAwaiter()
IL_0198: stloc.s 11
IL_019a: ldloca.s 11
IL_019c: call instance bool valuetype System.Runtime.CompilerServices.ConfiguredValueTaskAwaitable`1/ConfiguredValueTaskAwaiter<int32>::get_IsCompleted()
IL_01a1: brtrue.s IL_01e4

So the Jit is doing what its asked to do, in a way?

mikedn commented 5 years ago

Well, I don't know where those LCL_FLDs come from, presumably they were present in some IL - FSeq shows a field named _value. What's for sure that JIT's handling of struct typed fields needs improvement. Hopefully we'll get there sooner or later, I have a pending PR that's supposed to provided the preliminaries for that.

benaadams commented 5 years ago

Might not help they are different types; even though they are the exact same sized structs containing the same data? (as they are wrappers over each other)

benaadams commented 5 years ago

If I fold ConfiguredValueTaskAwaitable<TResult>.ConfiguredValueTaskAwaiter into ConfiguredValueTaskAwaitable<TResult> and change GetAwaiter from

public ConfiguredValueTaskAwaiter GetAwaiter() => new ConfiguredValueTaskAwaiter(in _value);

to

public ConfiguredValueTaskAwaitable<TResult> GetAwaiter() => this;

Then one of the ; do-not-enreg[SFB] must-init "NewObj constructor temp" drops out and its down the 3 copies rather than 4:

    call     qword ptr [rax+32]Stream:ReadAsync(struct,struct):struct:this
    mov      rcx, gword ptr [rbp-58H]
    mov      eax, dword ptr [rbp-50H]
    mov      edx, dword ptr [rbp-4CH]
    movsx    rdx, dx
    xor      r8d, r8d
    lea      r9, bword ptr [rbp-A8H] ;  do-not-enreg[XSB] must-init addr-exposed ld-addr-op "Inline stloc first use temp"
    mov      gword ptr [r9], rcx
    mov      dword ptr [r9+8], eax
    mov      word  ptr [r9+12], dx
    mov      byte  ptr [r9+14], r8b

G_M42772_IG32:
    movdqu   xmm0, qword ptr [rbp-A8H]
    movdqu   qword ptr [rbp-B8H], xmm0 ;  do-not-enreg[SFB] must-init "NewObj constructor temp"

G_M42772_IG33:
    movdqu   xmm0, qword ptr [rbp-B8H]
    movdqu   qword ptr [rbp-68H], xmm0 ;  do-not-enreg[SB] must-init ld-addr-op

G_M42772_IG34:
    movdqu   xmm0, qword ptr [rbp-68H] ;  do-not-enreg[XSFB] must-init addr-exposed ld-addr-op
    movdqu   qword ptr [rbp-78H], xmm0

G_M42772_IG35:
    mov      rsi, gword ptr [rbp-78H]
    test     rsi, rsi

Guessing that's new no longer being called rather than the types now matching though?

AndyAyersMS commented 5 years ago

Here's a derived example with similar results (3 back to back copies):

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

[StructLayout(LayoutKind.Auto)]
public readonly struct xValueTask
{
    internal readonly object _obj;
    internal readonly short _token;
    internal readonly bool _continueOnCapturedContext;

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public xValueTask(object o)
    {
        if (o == null)
        {
            ThrowHelper();
        }

        _obj = o;
        _continueOnCapturedContext = true;
        _token = 0;
    }

    static void ThrowHelper()
    {
        throw new Exception();
    }

    public bool IsCompleted
    {
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        get
        {
            object obj = _obj;

            if (obj == null)
            {
                return true;
            }

            if (obj is string s)
            {
                return true;
            }

            return false;
        }
    }
}

[StructLayout(LayoutKind.Auto)]
public readonly struct xConfiguredValueTaskAwaitable
{
    private readonly xValueTask _value;

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    internal xConfiguredValueTaskAwaitable(in xValueTask value) => _value = value;

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public xConfiguredValueTaskAwaiter GetAwaiter() => new xConfiguredValueTaskAwaiter(in _value);
}

[StructLayout(LayoutKind.Auto)]
public readonly struct xConfiguredValueTaskAwaiter
{
    private readonly xValueTask _value;

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    internal xConfiguredValueTaskAwaiter(in xValueTask value) => _value = value;

    public bool IsCompleted
    {
        get => _value.IsCompleted;
    }
}

class X
{
    public static int Main()
    {
        xValueTask xvt = new xValueTask("xyz");
        xConfiguredValueTaskAwaitable xcvta = new xConfiguredValueTaskAwaitable(in xvt);
        return xcvta.GetAwaiter().IsCompleted ? 100 : 0;
    }
}

we end up with

G_M52876_IG04:
       C5FA6F442430         vmovdqu  xmm0, qword ptr [rsp+30H]
       C5FA7F442450         vmovdqu  qword ptr [rsp+50H], xmm0

G_M52876_IG05:
       C5FA6F442450         vmovdqu  xmm0, qword ptr [rsp+50H]
       C5FA7F442420         vmovdqu  qword ptr [rsp+20H], xmm0

G_M52876_IG06:
       C5FA6F442420         vmovdqu  xmm0, qword ptr [rsp+20H]
       C5FA7F442440         vmovdqu  qword ptr [rsp+40H], xmm0
benaadams commented 5 years ago

Pipeline use in Kestrel is this variation on the above

+ using System.Buffers;

+ [Flags]
+ internal enum ResultFlags : byte
+ {
+     None = 0x0,
+     Canceled = 0x1,
+     Completed = 0x2
+ }

+ public readonly struct ReadResult
+ {
+     internal readonly ReadOnlySequence<byte> _resultBuffer;
+     internal readonly ResultFlags _resultFlags;        
+ }

 [StructLayout(LayoutKind.Auto)]
 public readonly struct xValueTask
 {
+    internal readonly ReadResult _result;
...

Which has 3 xmm0 shuffles per copy sharplab.io:

    L005e: vmovdqu xmm0, [rsp+0xe0]
    L0067: vmovdqu [rsp+0x50], xmm0
    L006d: vmovdqu xmm0, [rsp+0xf0]
    L0076: vmovdqu [rsp+0x60], xmm0
    L007c: vmovdqu xmm0, [rsp+0x100]
    L0085: vmovdqu [rsp+0x70], xmm0

    L008b: vmovdqu xmm0, [rsp+0x50]
    L0091: vmovdqu [rsp+0xb0], xmm0
    L009a: vmovdqu xmm0, [rsp+0x60]
    L00a0: vmovdqu [rsp+0xc0], xmm0
    L00a9: vmovdqu xmm0, [rsp+0x70]
    L00af: vmovdqu [rsp+0xd0], xmm0

    L00b8: vmovdqu xmm0, [rsp+0xb0]
    L00c1: vmovdqu [rsp+0x20], xmm0
    L00c7: vmovdqu xmm0, [rsp+0xc0]
    L00d0: vmovdqu [rsp+0x30], xmm0
    L00d6: vmovdqu xmm0, [rsp+0xd0]
    L00df: vmovdqu [rsp+0x40], xmm0

    L00e5: vmovdqu xmm0, [rsp+0x20]
    L00eb: vmovdqu [rsp+0x80], xmm0
    L00f4: vmovdqu xmm0, [rsp+0x30]
    L00fa: vmovdqu [rsp+0x90], xmm0
    L0103: vmovdqu xmm0, [rsp+0x40]
    L0109: vmovdqu [rsp+0xa0], xmm0
benaadams commented 5 years ago

Guess would be copyprop? Currently outputs this

Copy Assertion for BB02
  curSsaName stack: { }

    Live vars: {V00} => {}
    Live vars: {} => {V04}
    Live vars: {V04} => {}
    Live vars: {} => {V01}
    Live vars: {V01} => {}
    Live vars: {} => {V05}
    Live vars: {V05} => {}
    Live vars: {} => {V02}
    Live vars: {V02} => {}
    Live vars: {} => {V07}

But doesn't then do any copyprop

benaadams commented 5 years ago

Adding in the loop exit conditions

Copy Assertion for BB02
  curSsaName stack: { }

     !tree->IsLocal() 
     !tree->IsLocal() 
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
     !tree->IsLocal() 
                            Live vars: {V00} => {}
     lclNum == newLclNum 
                            Live vars: {} => {V04}
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
     !tree->IsLocal() 
                            Live vars: {V04} => {}
     opVN != tree->gtVNPair.GetConservative() 
     lclNum == newLclNum 
                            Live vars: {} => {V01}
     tree->gtFlags & GTF_VAR_DEF 
     !tree->IsLocal() 
                            Live vars: {V01} => {}
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
                            Live vars: {} => {V05}
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
     !tree->IsLocal() 
     !tree->IsLocal() 
     !tree->IsLocal() 
                            Live vars: {V05} => {}
     opVN != tree->gtVNPair.GetConservative() 
     opVN == ValueNumStore::NoVN 
     opVN == ValueNumStore::NoVN 
     lclNum == newLclNum 
                            Live vars: {} => {V02}
     tree->gtFlags & GTF_VAR_DEF 
     !tree->IsLocal() 
                            Live vars: {V02} => {}
     tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD 
                            Live vars: {} => {V07}
     tree->gtFlags & GTF_VAR_DEF 
     !tree->IsLocal() 
     op->TypeGet() != tree->TypeGet() 
     opVN == ValueNumStore::NoVN 
     opVN == ValueNumStore::NoVN 
     opVN == ValueNumStore::NoVN 
     opVN == ValueNumStore::NoVN 
     lclNum == newLclNum 
     !tree->IsLocal() 
     !tree->IsLocal() 
     !tree->IsLocal() 
benaadams commented 5 years ago

And stage before

***** BB02, stmt 5 (before)
N005 (  8,  9) [000071] IA------R---              *  ASG       struct (init)
N004 (  6,  7) [000070] n------N----              +--*  BLK(32)   struct
N003 (  3,  5) [000068] ------------              |  \--*  ADDR      byref 
N002 (  3,  4) [000066] U------N----              |     \--*  LCL_FLD   struct V00 loc0         ud:4->5[+16] Fseq[_result]
N001 (  1,  1) [000069] ------------              \--*  CNS_INT   int    0

N001 [000069]   CNS_INT   0 => $40 {IntCns 0}
  VNApplySelectors:
    VNForHandle(_result) is $c4, fieldType is struct, size = 32
      AX2: $c4 != $c3 ==> select([$201]store($1c1, $c3, $40), $c4) ==> select($1c1, $c4).
      AX2: $c4 != $c2 ==> select([$1c1]store($102, $c2, $43), $c4) ==> select($102, $c4).
      AX2: $c4 != $c1 ==> select([$102]store($1, $c1, $100), $c4) ==> select($1, $c4).
    VNForMapSelect($201, $c4):struct returns $VN.ZeroMap
    *** Mismatched types in VNApplySelectorsTypeCheck (indType is TYP_STRUCT)
  VNApplySelectors:
    VNForHandle(_result) is $c4, fieldType is struct, size = 32
      AX2: $c4 != $c3 ==> select([$200]store($1c0, $c3, $40), $c4) ==> select($1c0, $c4).
      AX2: $c4 != $c2 ==> select([$1c0]store($101, $c2, $43), $c4) ==> select($101, $c4).
      AX2: $c4 != $c1 ==> select([$101]store($1, $c1, $142), $c4) ==> select($1, $c4).
    VNForMapSelect($200, $c4):struct returns $VN.ZeroMap
    *** Mismatched types in VNApplySelectorsTypeCheck (indType is TYP_STRUCT)
N002 [000066]   LCL_FLD   V00 loc0         ud:4->5[+16] Fseq[_result] => <l:$143 {143}, c:$144 {144}>
    FieldSeq {_result} is $240
N003 [000068]   ADDR      => $280 {PtrToLoc($40, $240)}
N005 [000071]   ASG       V00/5 => $2c0 {2c0}
N005 [000071]   ASG       => $VN.Void

***** BB02, stmt 5 (after)
N005 (  8,  9) [000071] IA------R---              *  ASG       struct (init) $VN.Void
N004 (  6,  7) [000070] n------N----              +--*  BLK(32)   struct
N003 (  3,  5) [000068] ------------              |  \--*  ADDR      byref  $280
N002 (  3,  4) [000066] U------N----              |     \--*  LCL_FLD   struct V00 loc0         ud:4->5[+16] Fseq[_result] <l:$143, c:$144>
N001 (  1,  1) [000069] ------------              \--*  CNS_INT   int    0 $40

---------

***** BB02, stmt 6 (before)
N005 ( 10, 10) [000084] -A------R---              *  ASG       struct (copy)
N004 (  6,  7) [000083] n-----------              +--*  BLK(48)   struct
N003 (  3,  5) [000082] ------------              |  \--*  ADDR      byref 
N002 (  3,  4) [000077] D------N----              |     \--*  LCL_FLD   struct V04 tmp1         d:2[+0] Fseq[_value]
N001 (  3,  2) [000079] -------N----              \--*  LCL_VAR   struct V00 loc0         u:5 (last use)

N001 [000079]   LCL_VAR   V00 loc0         u:5 (last use) => $2c0 {2c0}
    FieldSeq {_value} is $241
N003 [000082]   ADDR      => $281 {PtrToLoc($44, $241)}
  VNApplySelectorsAssign:
    VNForHandle(_value) is $c5, fieldType is struct
    VNForMapStore($2c1, $c5, $2c0):struct returns $300 {$2c1[$c5 := $2c0]}
  VNApplySelectorsAssign:
    VNForHandle(_value) is $c5, fieldType is struct
    VNForMapStore($2c1, $c5, $2c0):struct returns $300 {$2c1[$c5 := $2c0]}
Tree [000084] assigned VN to local var V04/2: $300 {$2c1[$c5 := $2c0]}
N005 [000084]   ASG       => $VN.Void

***** BB02, stmt 6 (after)
N005 ( 10, 10) [000084] -A------R---              *  ASG       struct (copy) $VN.Void
N004 (  6,  7) [000083] n-----------              +--*  BLK(48)   struct
N003 (  3,  5) [000082] ------------              |  \--*  ADDR      byref  $281
N002 (  3,  4) [000077] D------N----              |     \--*  LCL_FLD   struct V04 tmp1         d:2[+0] Fseq[_value]
N001 (  3,  2) [000079] -------N----              \--*  LCL_VAR   struct V00 loc0         u:5 (last use) $2c0

---------

***** BB02, stmt 7 (before)
N003 (  7,  5) [000016] -A------R---              *  ASG       struct (copy)
N002 (  3,  2) [000014] D------N----              +--*  LCL_VAR   struct V01 loc1         d:2
N001 (  3,  2) [000013] ------------              \--*  LCL_VAR   struct V04 tmp1         u:2 (last use)

N001 [000013]   LCL_VAR   V04 tmp1         u:2 (last use) => $300 {$2c1[$c5 := $2c0]}
Tree [000016] assigned VN to local var V01/2: $300 {$2c1[$c5 := $2c0]}
N003 [000016]   ASG       => $VN.Void

***** BB02, stmt 7 (after)
N003 (  7,  5) [000016] -A------R---              *  ASG       struct (copy) $VN.Void
N002 (  3,  2) [000014] D------N----              +--*  LCL_VAR   struct V01 loc1         d:2
N001 (  3,  2) [000013] ------------              \--*  LCL_VAR   struct V04 tmp1         u:2 (last use) $300

---------

***** BB02, stmt 8 (before)
N007 ( 13, 15) [000115] -A--G---R---              *  ASG       struct (copy)
N006 (  6,  7) [000114] n-----------              +--*  BLK(48)   struct
N005 (  3,  5) [000113] ------------              |  \--*  ADDR      byref 
N004 (  3,  4) [000106] D------N----              |     \--*  LCL_FLD   struct V05 tmp2         d:2[+0] Fseq[_value]
N003 (  6,  7) [000111] n---G-------              \--*  IND       struct
N002 (  3,  5) [000107] ----G-------                 \--*  ADDR      byref 
N001 (  3,  4) [000110] -------N----                    \--*  LCL_FLD   struct V01 loc1         u:2[+0] Fseq[_value] (last use)

  VNApplySelectors:
    VNForHandle(_value) is $c5, fieldType is struct, size = 48
      AX1: select([$2c1]store($300, $c5, $2c0), $c5) ==> $2c0.
    VNForMapSelect($300, $c5):struct returns $2c0 {2c0}
  VNApplySelectors:
    VNForHandle(_value) is $c5, fieldType is struct, size = 48
      AX1: select([$2c1]store($300, $c5, $2c0), $c5) ==> $2c0.
    VNForMapSelect($300, $c5):struct returns $2c0 {2c0}
N001 [000110]   LCL_FLD   V01 loc1         u:2[+0] Fseq[_value] (last use) => $2c0 {2c0}
    FieldSeq {_value} is $241
N002 [000107]   ADDR      => $282 {PtrToLoc($43, $241)}
  VNApplySelectors:
    VNForHandle(_value) is $c5, fieldType is struct, size = 48
      AX1: select([$2c1]store($300, $c5, $2c0), $c5) ==> $2c0.
    VNForMapSelect($300, $c5):struct returns $2c0 {2c0}
  VNApplySelectors:
    VNForHandle(_value) is $c5, fieldType is struct, size = 48
      AX1: select([$2c1]store($300, $c5, $2c0), $c5) ==> $2c0.
    VNForMapSelect($300, $c5):struct returns $2c0 {2c0}
N003 [000111]   IND       => $2c0 {2c0}
    FieldSeq {_value} is $242
N005 [000113]   ADDR      => $283 {PtrToLoc($46, $242)}
  VNApplySelectors:
    VNForHandle(_value) is $c5, fieldType is struct, size = 48
      AX1: select([$2c1]store($300, $c5, $2c0), $c5) ==> $2c0.
    VNForMapSelect($300, $c5):struct returns $2c0 {2c0}
  VNApplySelectors:
    VNForHandle(_value) is $c5, fieldType is struct, size = 48
      AX1: select([$2c1]store($300, $c5, $2c0), $c5) ==> $2c0.
    VNForMapSelect($300, $c5):struct returns $2c0 {2c0}
  VNApplySelectorsAssign:
    VNForHandle(_value) is $c6, fieldType is struct
    VNForMapStore($2c3, $c6, $2c0):struct returns $301 {$2c3[$c6 := $2c0]}
  VNApplySelectorsAssign:
    VNForHandle(_value) is $c6, fieldType is struct
    VNForMapStore($2c3, $c6, $2c0):struct returns $301 {$2c3[$c6 := $2c0]}
Tree [000115] assigned VN to local var V05/2: $301 {$2c3[$c6 := $2c0]}
N007 [000115]   ASG       => $VN.Void

***** BB02, stmt 8 (after)
N007 ( 13, 15) [000115] -A--G---R---              *  ASG       struct (copy) $VN.Void
N006 (  6,  7) [000114] n-----------              +--*  BLK(48)   struct
N005 (  3,  5) [000113] ------------              |  \--*  ADDR      byref  $283
N004 (  3,  4) [000106] D------N----              |     \--*  LCL_FLD   struct V05 tmp2         d:2[+0] Fseq[_value]
N003 (  6,  7) [000111] n---G-------              \--*  IND       struct $2c0
N002 (  3,  5) [000107] ----G-------                 \--*  ADDR      byref  $282
N001 (  3,  4) [000110] -------N----                    \--*  LCL_FLD   struct V01 loc1         u:2[+0] Fseq[_value] (last use) $2c0

---------

***** BB02, stmt 9 (before)
N003 (  7,  5) [000103] -A------R---              *  ASG       struct (copy)
N002 (  3,  2) [000102] D------N----              +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000100] ------------              \--*  LCL_VAR   struct V05 tmp2         u:2 (last use)

N001 [000100]   LCL_VAR   V05 tmp2         u:2 (last use) => $301 {$2c3[$c6 := $2c0]}
Tree [000103] assigned VN to local var V02/2: $301 {$2c3[$c6 := $2c0]}
N003 [000103]   ASG       => $VN.Void

***** BB02, stmt 9 (after)
N003 (  7,  5) [000103] -A------R---              *  ASG       struct (copy) $VN.Void
N002 (  3,  2) [000102] D------N----              +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000100] ------------              \--*  LCL_VAR   struct V05 tmp2         u:2 (last use) $301

---------

***** BB02, stmt 10 (before)
N003 (  3,  4) [000133] -A--G---R---              *  ASG       ref   
N002 (  1,  1) [000132] D------N----              +--*  LCL_VAR   ref    V07 tmp4         d:2
N001 (  3,  4) [000130] ------------              \--*  LCL_FLD   ref    V02 loc2         u:2[+0] Fseq[_value, _obj] (last use)

  VNApplySelectors:
    VNForHandle(_value) is $c6, fieldType is struct, size = 48
      AX1: select([$2c3]store($301, $c6, $2c0), $c6) ==> $2c0.
    VNForMapSelect($301, $c6):struct returns $2c0 {2c0}
  VNApplySelectors:
    VNForHandle(_obj) is $c1, fieldType is ref
    VNForMapSelect($2c0, $c1):ref returns $243 {$2c0[$c1]}
  VNApplySelectors:
    VNForHandle(_value) is $c6, fieldType is struct, size = 48
      AX1: select([$2c3]store($301, $c6, $2c0), $c6) ==> $2c0.
    VNForMapSelect($301, $c6):struct returns $2c0 {2c0}
  VNApplySelectors:
    VNForHandle(_obj) is $c1, fieldType is ref
    VNForMapSelect($2c0, $c1):ref returns $243 {$2c0[$c1]}
N001 [000130]   LCL_FLD   V02 loc2         u:2[+0] Fseq[_value, _obj] (last use) => $243 {$2c0[$c1]}
N002 [000132]   LCL_VAR   V07 tmp4         d:2 => $243 {$2c0[$c1]}
N003 [000133]   ASG       => $243 {$2c0[$c1]}

***** BB02, stmt 10 (after)
N003 (  3,  4) [000133] -A--G---R---              *  ASG       ref    $243
N002 (  1,  1) [000132] D------N----              +--*  LCL_VAR   ref    V07 tmp4         d:2 $243
N001 (  3,  4) [000130] ------------              \--*  LCL_FLD   ref    V02 loc2         u:2[+0] Fseq[_value, _obj] (last use) $243

Or

***** BB02, stmt 5         
N005 (  8,  9) [000071] IA------R---  *  ASG       struct (init) $VN.Void
N004 (  6,  7) [000070] n------N----  +--*  BLK(32)   struct
N003 (  3,  5) [000068] ------------  |  \--*  ADDR      byref  $280
N002 (  3,  4) [000066] U------N----  |     \--*  LCL_FLD   struct V00 loc0  ud:4->5[+16] Fseq[_result] <l:$143, c:$144>
N001 (  1,  1) [000069] ------------  \--*  CNS_INT   int    0 $40

***** BB02, stmt 6         
N005 ( 10, 10) [000084] -A------R---  *  ASG       struct (copy) $VN.Void
N004 (  6,  7) [000083] n-----------  +--*  BLK(48)   struct
N003 (  3,  5) [000082] ------------  |  \--*  ADDR      byref  $281
N002 (  3,  4) [000077] D------N----  |     \--*  LCL_FLD   struct V04 tmp1   d:2[+0] Fseq[_value]
N001 (  3,  2) [000079] -------N----  \--*  LCL_VAR   struct V00 loc0         u:5 (last use) $2c0

***** BB02, stmt 7           
N003 (  7,  5) [000016] -A------R---  *  ASG       struct (copy) $VN.Void
N002 (  3,  2) [000014] D------N----  +--*  LCL_VAR   struct V01 loc1         d:2
N001 (  3,  2) [000013] ------------  \--*  LCL_VAR   struct V04 tmp1         u:2 (last use) $300

***** BB02, stmt 8          
N007 ( 13, 15) [000115] -A--G---R---  *  ASG       struct (copy) $VN.Void
N006 (  6,  7) [000114] n-----------  +--*  BLK(48)   struct
N005 (  3,  5) [000113] ------------  |  \--*  ADDR      byref  $283
N004 (  3,  4) [000106] D------N----  |     \--*  LCL_FLD   struct V05 tmp2   d:2[+0] Fseq[_value]
N003 (  6,  7) [000111] n---G-------  \--*  IND       struct $2c0
N002 (  3,  5) [000107] ----G-------     \--*  ADDR      byref  $282
N001 (  3,  4) [000110] -------N----        \--*  LCL_FLD   struct V01 loc1   u:2[+0] Fseq[_value] (last use) $2c0

***** BB02, stmt 9           
N003 (  7,  5) [000103] -A------R---  *  ASG       struct (copy) $VN.Void
N002 (  3,  2) [000102] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000100] ------------  \--*  LCL_VAR   struct V05 tmp2         u:2 (last use) $301

***** BB02, stmt 10                   
N003 (  3,  4) [000133] -A--G---R---  *  ASG       ref    $243
N002 (  1,  1) [000132] D------N----  +--*  LCL_VAR   ref    V07 tmp4         d:2 $243
N001 (  3,  4) [000130] ------------  \--*  LCL_FLD   ref    V02 loc2         u:2[+0] Fseq[_value, _obj] (last use) $243
benaadams commented 5 years ago

Hmm not sure how to do this.

What I'm envisaging is if there is a copy to a (first use); and followed by a (last use) copy

var1            -> var2 (first use)
var2 (last use) -> var3

Then that intermediate copy can be skipped becoming

var1 -> var3

Or in the Live vars output

Live vars: {V00} => {}
Live vars: {} => {V04}
Live vars: {V04} => {}
Live vars: {} => {V01}
Live vars: {V01} => {}
Live vars: {} => {V05}
Live vars: {V05} => {}
Live vars: {} => {V02}
Live vars: {V02} => {}
Live vars: {} => {V07}

becomes

Live vars: {V00} => {}
Live vars: {} => {V07}
benaadams commented 5 years ago

Might have something...

benaadams commented 5 years ago

https://github.com/dotnet/coreclr/commit/6b3131f1c3b94da36405693f7e8015f7157e72fb identifying the items to change, but not updating the tree correctly as it crashes in the optOptimizeValnumCSEs step

CopyBlk based copy assertion for [000106] V05 @00000003 by [000102] V02 @00000003.

***** BB02 (before)
N007 ( 13, 15) [000115] -A--G---R---  *  ASG       struct (copy) $VN.Void
N006 (  6,  7) [000114] n-----------  +--*  BLK(48)   struct
N005 (  3,  5) [000113] ------------  |  \--*  ADDR      byref  $283
N004 (  3,  4) [000106] D------N----  |     \--*  LCL_FLD   struct V05 tmp2         d:2[+0] Fseq[_value]
N003 (  6,  7) [000111] n---G-------  \--*  IND       struct $2c0
N002 (  3,  5) [000107] ----G-------     \--*  ADDR      byref  $282
N001 (  3,  4) [000110] -------N----        \--*  LCL_FLD   struct V01 loc1         u:2[+0] Fseq[_value] (last use) $2c0

N003 (  7,  5) [000103] -A------R---  *  ASG       struct (copy) $VN.Void
N002 (  3,  2) [000102] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000100] ------------  \--*  LCL_VAR   struct V05 tmp2         u:2 (last use) $301

Copy propagated to:

***** BB02 (after)
N007 ( 13, 15) [000115] -A--G---R---  *  ASG       struct (copy) $VN.Void
     (  3,  2) [000228] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N003 (  6,  7) [000111] n---G-------  \--*  IND       struct $2c0
N002 (  3,  5) [000107] ----G-------     \--*  ADDR      byref  $282
N001 (  3,  4) [000110] -------N----        \--*  LCL_FLD   struct V01 loc1         u:2[+0] Fseq[_value] (last use) $2c0

N003 (  7,  5) [000103] ------------  *  NOP       void  

CopyBlk based copy assertion for [000014] V01 @00000003 by [000228] V02 @00000003.

***** BB02 (before)
N003 (  7,  5) [000016] -A------R---  *  ASG       struct (copy) $VN.Void
N002 (  3,  2) [000014] D------N----  +--*  LCL_VAR   struct V01 loc1         d:2
N001 (  3,  2) [000013] ------------  \--*  LCL_VAR   struct V04 tmp1         u:2 (last use) $300

N007 ( 13, 15) [000115] -A--G---R---  *  ASG       struct (copy) $VN.Void
     (  3,  2) [000228] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N003 (  6,  7) [000111] n---G-------  \--*  IND       struct $2c0
N002 (  3,  5) [000107] ----G-------     \--*  ADDR      byref  $282
N001 (  3,  4) [000110] -------N----        \--*  LCL_FLD   struct V01 loc1         u:2[+0] Fseq[_value] (last use) $2c0

Copy propagated to:

***** BB02 (after)
N003 (  7,  5) [000016] -A------R---  *  ASG       struct (copy) $VN.Void
     (  3,  2) [000229] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000013] ------------  \--*  LCL_VAR   struct V04 tmp1         u:2 (last use) $300

N007 ( 13, 15) [000115] ------------  *  NOP       void  

CopyBlk based copy assertion for [000077] V04 @00000003 by [000229] V02 @00000003.

***** BB02 (before)
N005 ( 10, 10) [000084] -A------R---  *  ASG       struct (copy) $VN.Void
N004 (  6,  7) [000083] n-----------  +--*  BLK(48)   struct
N003 (  3,  5) [000082] ------------  |  \--*  ADDR      byref  $281
N002 (  3,  4) [000077] D------N----  |     \--*  LCL_FLD   struct V04 tmp1         d:2[+0] Fseq[_value]
N001 (  3,  2) [000079] -------N----  \--*  LCL_VAR   struct V00 loc0         u:5 (last use) $2c0

N003 (  7,  5) [000016] -A------R---  *  ASG       struct (copy) $VN.Void
     (  3,  2) [000229] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000013] ------------  \--*  LCL_VAR   struct V04 tmp1         u:2 (last use) $300

Copy propagated to:

***** BB02 (after)
N005 ( 10, 10) [000084] -A------R---  *  ASG       struct (copy) $VN.Void
     (  3,  2) [000230] D------N----  +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000079] -------N----  \--*  LCL_VAR   struct V00 loc0         u:5 (last use) $2c0

N003 (  7,  5) [000016] ------------  *  NOP       void  
------------ BB02 [000..01D) -> BB04 (cond), preds={BB01} succs={BB03,BB04}

***** BB02, stmt 2
     (  9, 17) [000052] ------------  *  STMT      void  (IL 0x000...  ???)
N004 (  9, 17) [000051] -A--G---R---  \--*  ASG       ref    <l:$100, c:$142>
N003 (  3,  4) [000048] U------N----     +--*  LCL_FLD   ref    V00 loc0         ud:1->2[+0] Fseq[_obj] <l:$102, c:$101>
N002 (  5, 12) [000193] n---G-------     \--*  IND       ref    <l:$100, c:$142>
N001 (  3, 10) [000192] ------------        \--*  CNS_INT(h) long   0x914133C0 "xyz" $c0

***** BB02, stmt 3
     (  6,  7) [000058] ------------  *  STMT      void  (IL 0x000...  ???)
N003 (  6,  7) [000057] -A------R---  \--*  ASG       bool   $43
N002 (  4,  5) [000054] U------N----     +--*  LCL_FLD   bool   V00 loc0         ud:2->3[+10] Fseq[_continueOnCapturedContext] <l:$1c1, c:$1c0>
N001 (  1,  1) [000055] ------------     \--*  CNS_INT   int    1 $43

***** BB02, stmt 4
     (  6,  7) [000064] ------------  *  STMT      void  (IL 0x000...  ???)
N003 (  6,  7) [000063] -A------R---  \--*  ASG       short  $40
N002 (  4,  5) [000060] U------N----     +--*  LCL_FLD   short  V00 loc0         ud:3->4[+8] Fseq[_token] <l:$201, c:$200>
N001 (  1,  1) [000061] ------------     \--*  CNS_INT   int    0 $40

***** BB02, stmt 5
     (  8,  9) [000072] ------------  *  STMT      void  (IL 0x000...  ???)
N005 (  8,  9) [000071] IA------R---  \--*  ASG       struct (init) $VN.Void
N004 (  6,  7) [000070] n------N----     +--*  BLK(32)   struct
N003 (  3,  5) [000068] ------------     |  \--*  ADDR      byref  $280
N002 (  3,  4) [000066] U------N----     |     \--*  LCL_FLD   struct V00 loc0         ud:4->5[+16] Fseq[_result] <l:$143, c:$144>
N001 (  1,  1) [000069] ------------     \--*  CNS_INT   int    0 $40

***** BB02, stmt 6
     ( 10, 10) [000085] ------------  *  STMT      void  (IL 0x00C...  ???)
N005 ( 10, 10) [000084] -A------R---  \--*  ASG       struct (copy) $VN.Void
     (  3,  2) [000230] D------N----     +--*  LCL_VAR   struct V02 loc2         d:2
N001 (  3,  2) [000079] -------N----     \--*  LCL_VAR   struct V00 loc0         u:5 (last use) $2c0

***** BB02, stmt 7
     (  7,  5) [000017] ------------  *  STMT      void  (IL 0x013...  ???)
N003 (  7,  5) [000016] ------------  \--*  NOP       void  

***** BB02, stmt 8
     ( 13, 15) [000116] ------------  *  STMT      void  (IL 0x014...  ???)
N007 ( 13, 15) [000115] ------------  \--*  NOP       void  

***** BB02, stmt 9
     (  7,  5) [000026] ------------  *  STMT      void  (IL   ???...  ???)
N003 (  7,  5) [000103] ------------  \--*  NOP       void  

***** BB02, stmt 10
     (  3,  4) [000134] ------------  *  STMT      void  (IL 0x01C...  ???)
N003 (  3,  4) [000133] -A--G---R---  \--*  ASG       ref    $243
N002 (  1,  1) [000132] D------N----     +--*  LCL_VAR   ref    V07 tmp4         d:2 $243
N001 (  3,  4) [000130] ------------     \--*  LCL_FLD   ref    V02 loc2         u:2[+0] Fseq[_value, _obj] (last use) $243

***** BB02, stmt 11
     (  5,  5) [000139] ------------  *  STMT      void  (IL 0x01C...  ???)
N004 (  5,  5) [000138] ------------  \--*  JTRUE     void  
N003 (  3,  3) [000137] J------N----     \--*  NE        int    $182
N001 (  1,  1) [000135] ------------        +--*  LCL_VAR   ref    V07 tmp4         u:2 $243
N002 (  1,  1) [000136] ------------        \--*  CNS_INT   ref    null $VN.Null

------------ BB03 [01C..01D) -> BB10 (always), preds={BB02} succs={BB10}
benaadams commented 5 years ago

Have an change that looks like it addresses it; just need to clean it up. It makes the following diff

-;  V00 loc0         [V00,T00] (  5,  5   )  struct (48) [rsp+0xE0]   do-not-enreg[SFB] must-init ld-addr-op
+;  V00 loc0         [V00,T00] (  5,  5   )  struct (48) [rsp+0x20]   do-not-enreg[SFB] must-init ld-addr-op
-;  V01 loc1         [V01,T06] (  2,  2   )  struct (48) [rsp+0xB0]   do-not-enreg[SFB] must-init ld-addr-op
+;* V01 loc1         [V01    ] (  0,  0   )  struct (48) zero-ref    do-not-enreg[SFB] ld-addr-op
-;  V02 loc2         [V02,T07] (  2,  2   )  struct (48) [rsp+0x80]   do-not-enreg[SFB] must-init ld-addr-op
+;* V02 loc2         [V02    ] (  0,  0   )  struct (48) zero-ref    do-not-enreg[SFB] ld-addr-op
 ;  V03 OutArgs      [V03    ] (  1,  1   )  lclBlk (32) [rsp+0x00]   "OutgoingArgSpace"
-;  V04 tmp1         [V04,T01] (  2,  4   )  struct (48) [rsp+0x50]   do-not-enreg[SFB] must-init "NewObj constructor temp"
+;* V04 tmp1         [V04    ] (  0,  0   )  struct (48) zero-ref    do-not-enreg[SFB] "NewObj constructor temp"
-;  V05 tmp2         [V05,T02] (  2,  4   )  struct (48) [rsp+0x20]   do-not-enreg[SFB] must-init "NewObj constructor temp"
+;* V05 tmp2         [V05    ] (  0,  0   )  struct (48) zero-ref    do-not-enreg[SFB] "NewObj constructor temp"
 ;  V06 tmp3         [V06,T04] (  4,  2.50)    bool  ->  rdx         "Inline return value spill temp"
 ;  V07 tmp4         [V07,T05] (  3,  2.25)     ref  ->  rax         class-hnd "Inline stloc first use temp"
 ;  V08 tmp5         [V08,T08] (  4,  1.62)     ref  ->  rax         class-hnd "spilling QMark2"
 ;  V09 cse0         [V09,T03] (  3,  3   )     ref  ->  rax         "ValNumCSE"
 ;
-; Lcl frame size = 272
+; Lcl frame size = 80

 G_M11412_IG01:
        57                   push     rdi
-       4881EC10010000       sub      rsp, 272
+       4883EC50             sub      rsp, 80
        C5F877               vzeroupper 
        488D7C2420           lea      rdi, [rsp+20H]
-       B93C000000           mov      ecx, 60
+       B90C000000           mov      ecx, 12
        33C0                 xor      rax, rax
        F3AB                 rep stosd 

 G_M11412_IG02:
-       48B8C033BD566E010000 mov      rax, 0x16E56BD33C0
+       48B8C033001016020000 mov      rax, 0x216100033C0
        488B00               mov      rax, gword ptr [rax]
        4885C0               test     rax, rax
        0F844E010000         je       G_M11412_IG16

 G_M11412_IG03:
-       48898424E0000000     mov      gword ptr [rsp+E0H], rax
-       C68424EA00000001     mov      byte  ptr [rsp+EAH], 1
-       66C78424E80000000000 mov      word  ptr [rsp+E8H], 0
-       488D8424F0000000     lea      rax, bword ptr [rsp+F0H]
+       4889442420           mov      gword ptr [rsp+20H], rax
+       C644242A01           mov      byte  ptr [rsp+2AH], 1
+       66C74424280000       mov      word  ptr [rsp+28H], 0
+       488D442430           lea      rax, bword ptr [rsp+30H]
        C5F857C0             vxorps   xmm0, xmm0
        C5FA7F00             vmovdqu  qword ptr [rax], xmm0
        C5FA7F4010           vmovdqu  qword ptr [rax+16], xmm0

-G_M11412_IG04:
-       C5FA6F8424E0000000   vmovdqu  xmm0, qword ptr [rsp+E0H]
-       C5FA7F442450         vmovdqu  qword ptr [rsp+50H], xmm0
-       C5FA6F8424F0000000   vmovdqu  xmm0, qword ptr [rsp+F0H]
-       C5FA7F442460         vmovdqu  qword ptr [rsp+60H], xmm0
-       C5FA6F842400010000   vmovdqu  xmm0, qword ptr [rsp+100H]
-       C5FA7F442470         vmovdqu  qword ptr [rsp+70H], xmm0
-
-G_M11412_IG05:
-       C5FA6F442450         vmovdqu  xmm0, qword ptr [rsp+50H]
-       C5FA7F8424B0000000   vmovdqu  qword ptr [rsp+B0H], xmm0
-       C5FA6F442460         vmovdqu  xmm0, qword ptr [rsp+60H]
-       C5FA7F8424C0000000   vmovdqu  qword ptr [rsp+C0H], xmm0
-       C5FA6F442470         vmovdqu  xmm0, qword ptr [rsp+70H]
-       C5FA7F8424D0000000   vmovdqu  qword ptr [rsp+D0H], xmm0
-
-G_M11412_IG06:
-       C5FA6F8424B0000000   vmovdqu  xmm0, qword ptr [rsp+B0H]
-       C5FA7F442420         vmovdqu  qword ptr [rsp+20H], xmm0
-       C5FA6F8424C0000000   vmovdqu  xmm0, qword ptr [rsp+C0H]
-       C5FA7F442430         vmovdqu  qword ptr [rsp+30H], xmm0
-       C5FA6F8424D0000000   vmovdqu  xmm0, qword ptr [rsp+D0H]
-       C5FA7F442440         vmovdqu  qword ptr [rsp+40H], xmm0
-
-G_M11412_IG07:
-       C5FA6F442420         vmovdqu  xmm0, qword ptr [rsp+20H]
-       C5FA7F842480000000   vmovdqu  qword ptr [rsp+80H], xmm0
-       C5FA6F442430         vmovdqu  xmm0, qword ptr [rsp+30H]
-       C5FA7F842490000000   vmovdqu  qword ptr [rsp+90H], xmm0
-       C5FA6F442440         vmovdqu  xmm0, qword ptr [rsp+40H]
-       C5FA7F8424A0000000   vmovdqu  qword ptr [rsp+A0H], xmm0

 G_M11412_IG08:
-       488B842480000000     mov      rax, gword ptr [rsp+80H]
+       488B442420           mov      rax, gword ptr [rsp+20H]
        4885C0               test     rax, rax
CarolEidt commented 4 years ago

This was previously issue 18542 in dotnet/coreclr, and the repro is captured as JIT\Regression\JitBlue\GitHub_18542.

benaadams commented 3 years ago

This is much closer now as this chonk sharplab.io:

    L0051: vxorps xmm0, xmm0, xmm0
    L0055: vmovdqu [rax], xmm0
    L0059: vmovdqu [rax+0x10], xmm0
    L005e: vmovdqu xmm0, [rsp+0xe0]
    L0067: vmovdqu [rsp+0x50], xmm0
    L006d: vmovdqu xmm0, [rsp+0xf0]
    L0076: vmovdqu [rsp+0x60], xmm0
    L007c: vmovdqu xmm0, [rsp+0x100]
    L0085: vmovdqu [rsp+0x70], xmm0
    L008b: vmovdqu xmm0, [rsp+0x50]
    L0091: vmovdqu [rsp+0xb0], xmm0
    L009a: vmovdqu xmm0, [rsp+0x60]
    L00a0: vmovdqu [rsp+0xc0], xmm0
    L00a9: vmovdqu xmm0, [rsp+0x70]
    L00af: vmovdqu [rsp+0xd0], xmm0
    L00b8: vmovdqu xmm0, [rsp+0xb0]
    L00c1: vmovdqu [rsp+0x20], xmm0
    L00c7: vmovdqu xmm0, [rsp+0xc0]
    L00d0: vmovdqu [rsp+0x30], xmm0
    L00d6: vmovdqu xmm0, [rsp+0xd0]
    L00df: vmovdqu [rsp+0x40], xmm0
    L00e5: vmovdqu xmm0, [rsp+0x20]
    L00eb: vmovdqu [rsp+0x80], xmm0
    L00f4: vmovdqu xmm0, [rsp+0x30]
    L00fa: vmovdqu [rsp+0x90], xmm0
    L0103: vmovdqu xmm0, [rsp+0x40]
    L0109: vmovdqu [rsp+0xa0], xmm0
    L0112: mov rax, [rsp+0x80]
    L011a: test rax, rax

Has trimmed down significantly; which looks to be copying to output stack?

       vxorps   xmm0, xmm0
       vmovdqu  xmmword ptr [rsp+48H], xmm0
       vmovdqu  xmmword ptr [rsp+58H], xmm0

G_M7880_IG03:
       vmovdqu  xmm0, xmmword ptr [rsp+38H]
       vmovdqu  xmmword ptr [rsp+08H], xmm0
       vmovdqu  xmm0, xmmword ptr [rsp+48H]
       vmovdqu  xmmword ptr [rsp+18H], xmm0
       vmovdqu  xmm0, xmmword ptr [rsp+58H]
       vmovdqu  xmmword ptr [rsp+28H], xmm0

G_M7880_IG04:
       mov      rax, gword ptr [rsp+08H]
       test     rax, rax

So will close