Open AndyAyersMS opened 4 months ago
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch See info in area-owners.md if you want to be subscribed.
For the delegate case, if we add
index 89e28c5978c..a069d98503e 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -719,12 +719,23 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
case GT_CALL:
{
- GenTreeCall* asCall = parent->AsCall();
+ GenTreeCall* const call = parent->AsCall();
- if (asCall->IsHelperCall())
+ if (call->IsHelperCall())
{
canLclVarEscapeViaParentStack =
- !Compiler::s_helperCallProperties.IsNoEscape(comp->eeGetHelperNum(asCall->gtCallMethHnd));
+ !Compiler::s_helperCallProperties.IsNoEscape(comp->eeGetHelperNum(call->gtCallMethHnd));
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ // Delegate invoke won't escape the delegate which is passed as "this"
+ // And gets expanded inline later.
+ //
+ if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) != 0)
+ {
+ GenTree* const thisArg = call->gtArgs.GetThisArg()->GetNode();
+ canLclVarEscapeViaParentStack = thisArg != tree;
+ }
Then the example above becomes
; Method Y:Test():int (FullOpts)
G_M53607_IG01: ;; offset=0x0000
sub rsp, 88
vxorps xmm4, xmm4, xmm4
vmovdqu ymmword ptr [rsp+0x20], ymm4
vmovdqa xmmword ptr [rsp+0x40], xmm4
xor eax, eax
mov qword ptr [rsp+0x50], rax
;; size=27 bbWeight=1 PerfScore 5.83
G_M53607_IG02: ;; offset=0x001B
mov rcx, 0x7FFD4BB04580 ; Y+<>c__DisplayClass0_0
call CORINFO_HELP_NEWSFAST
mov dword ptr [rax+0x08], 100
mov rcx, 0x7FFD4BB04B30 ; System.Func`1[int]
mov qword ptr [rsp+0x20], rcx
mov gword ptr [rsp+0x28], rax
mov rcx, 0x7FFD4B8783F0 ; code for Y+<>c__DisplayClass0_0:<Test>b__0():int:this
mov qword ptr [rsp+0x38], rcx
lea rax, [rsp+0x20]
mov rcx, gword ptr [rax+0x08]
call [rax+0x18]System.Func`1[int]:Invoke():int:this
nop
;; size=70 bbWeight=1 PerfScore 11.50
G_M53607_IG03: ;; offset=0x0061
add rsp, 88
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 102
where the closure is still on the heap and we're invoking the delegate func "directly" but via a convoluted path where we store the func's (indirection cell) address to the stack allocated delegate and then fetch it back and indirect through it.
Ideally we'd like to be able to inline and perhaps realize the closure doesn't escape either, but that seems far off. Perhaps we can just summarily claim the closure can't escape. I am not sure.
Moving delegate invoke expansion earlier does not look to be simple -- currently there is some prep work in morph and then the actual expansion in lower, and tail calls are a complication.
@AndyAyersMS With the array (non-gc elems) support + my field analysis prototype + the above delegate handling (branch at https://github.com/hez2010/runtime/tree/field-stackalloc), the codegen becomes:
G_M30166_IG01: ;; offset=0x0000
sub rsp, 104
vxorps xmm4, xmm4, xmm4
vmovdqu ymmword ptr [rsp+0x20], ymm4
vmovdqa xmmword ptr [rsp+0x40], xmm4
xor eax, eax
mov qword ptr [rsp+0x50], rax
;; size=27 bbWeight=1 PerfScore 5.83
G_M30166_IG02: ;; offset=0x001B
xor ecx, ecx
mov qword ptr [rsp+0x58], rcx
mov dword ptr [rsp+0x60], ecx
mov rcx, 0x7FFEBB4211B8 ; Y+<>c__DisplayClass7_0
mov qword ptr [rsp+0x58], rcx
mov dword ptr [rsp+0x60], 100
mov rcx, 0x7FFEBB420EE8 ; System.Func`1[int]
mov qword ptr [rsp+0x20], rcx
lea rcx, [rsp+0x58]
mov qword ptr [rsp+0x28], rcx
mov rcx, 0x7FFEBB3F0678 ; code for Y+<>c__DisplayClass7_0:<Test>b__0():int:this
mov qword ptr [rsp+0x38], rcx
lea rax, [rsp+0x20]
mov rcx, gword ptr [rax+0x08]
call [rax+0x18]System.Func`1[int]:Invoke():int:this
nop
;; size=87 bbWeight=1 PerfScore 14.25
G_M30166_IG03: ;; offset=0x0072
add rsp, 104
ret
Added Diagnostics
section
Put more useful links:
An overview of the impl of esacpe analysis including interprocedural analysis in JVM: https://cr.openjdk.org/~cslucas/escape-analysis/EscapeAnalysis.html
Some insights on allocating objects in a loop and etc.: https://devblogs.microsoft.com/java/improving-openjdk-scalar-replacement-part-2-3/
Stack allocation of non-escaping ref classes and boxed value classes was enabled in #103361, but only works in limited cases. This issue tracks further enhancements (see also #11192).
Abilities:
a small tweak to escape analysis gets the delegate on the stack, but the invoke expansion currently happens in lower so we don't get any physical promotion. We would need to move this earlier.
See note below.
localloc
instead of fixed allocations (at least for non-gc types; for GC types there's currently no way to do proper GC reporting)Analysis:
84872
61455
Implementation:
ALLOCOBJ
ALLOCOBJ
assigned to single-def temp in importerNAOT:
Advanced:
Diagnostics:
FYI @dotnet/jit-contrib