From c2e7016916241e435fa1dc58309d8e937a4393a0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 9 Jun 2026 10:36:37 +0200 Subject: [PATCH 1/5] JIT: Clear out return value references from continuations Async1 deterministically clears out awaiters on resumption, meaning that the callee's `Task` and its result do not stay alive. This change similarly makes it so that we do not keep results alive in async2. Async1 has similar clearing for locals based on lexical scope. I am hoping we can get away with not implementing something similar for runtime async (it would be expensive and impossible to guarantee similar behavior as async1). --- src/coreclr/jit/async.cpp | 71 ++++++++++++++++++++++++++++++++++++++- src/coreclr/jit/async.h | 3 ++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index f5cff305322e8a..7dccc6019a511c 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3050,7 +3050,8 @@ BasicBlock* AsyncTransformation::RethrowExceptionOnResumption(BasicBlock* //------------------------------------------------------------------------ // AsyncTransformation::CopyReturnValueOnResumption: // Create IR that copies the return value from the continuation object to the -// right local. +// right local. When continuations may be reused, also clears out any GC +// references in the return value from the continuation afterwards. // // Parameters: // call - The async call. @@ -3149,6 +3150,74 @@ void AsyncTransformation::CopyReturnValueOnResumption(GenTreeCall* LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, storeResult)); } + + if (ReuseContinuations()) + { + ClearReturnValueOnResumption(retInfo, resultOffset, storeResultBB); + } +} + +//------------------------------------------------------------------------ +// AsyncTransformation::ClearReturnValueOnResumption: +// Create IR that clears out any GC references in the return value from the +// continuation object. This is used after the return value has been copied +// out to ensure that a reused continuation does not keep those references +// alive. +// +// Parameters: +// retInfo - Information about the return value in the continuation. +// resultOffset - Offset of the return value from the start of the continuation object. +// storeResultBB - Basic block to append IR to. +// +void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo, + unsigned resultOffset, + BasicBlock* storeResultBB) +{ + auto clearGCRef = [=](unsigned offset) { + GenTree* base = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); + GenTree* null = m_compiler->gtNewNull(); + GenTree* clear = StoreAtOffset(base, offset, null, TYP_REF); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, clear)); + }; + + if (retInfo->Type.ReturnType == TYP_STRUCT) + { + ClassLayout* retLayout = retInfo->Type.ReturnLayout; + unsigned gcPtrCount = retLayout->GetGCPtrCount(); + if (gcPtrCount == 0) + { + return; + } + + // If there are few GC references, and at most half of the struct is + // made up of GC references, then clear the individual GC pointers + // instead of zeroing out the whole struct. + if ((gcPtrCount <= 4) && ((gcPtrCount * 2) <= retLayout->GetSlotCount())) + { + for (unsigned i = 0; i < retLayout->GetSlotCount(); i++) + { + if (retLayout->IsGCPtr(i)) + { + clearGCRef(resultOffset + (i * TARGET_POINTER_SIZE)); + } + } + } + else + { + GenTree* base = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); + GenTree* offset = m_compiler->gtNewIconNode((ssize_t)resultOffset, TYP_I_IMPL); + GenTree* addr = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, base, offset); + GenTreeFlags indirFlags = + GTF_IND_NONFAULTING | (retInfo->HeapAlignment() < retInfo->Alignment ? GTF_IND_UNALIGNED : GTF_EMPTY); + GenTree* zero = m_compiler->gtNewIconNode(0); + GenTree* store = m_compiler->gtNewStoreValueNode(retLayout, addr, zero, indirFlags); + LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, store)); + } + } + else if (retInfo->Type.ReturnType == TYP_REF) + { + clearGCRef(resultOffset); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h index 6e194fef03019b..eb7240ee6fceb4 100644 --- a/src/coreclr/jit/async.h +++ b/src/coreclr/jit/async.h @@ -476,6 +476,9 @@ class AsyncTransformation const CallDefinitionInfo& callDefInfo, const ContinuationLayout& layout, BasicBlock* storeResultBB); + void ClearReturnValueOnResumption(const ReturnInfo* retInfo, + unsigned resultOffset, + BasicBlock* storeResultBB); GenTreeIndir* LoadFromOffset(GenTree* base, unsigned offset, From ac3208042f40729041e77fbf6d2ea3e678073fe4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 9 Jun 2026 10:56:54 +0200 Subject: [PATCH 2/5] Feedback --- src/coreclr/jit/async.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 7dccc6019a511c..5ebfb6dd5acd5c 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3173,10 +3173,10 @@ void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo unsigned resultOffset, BasicBlock* storeResultBB) { - auto clearGCRef = [=](unsigned offset) { + auto clearGCRef = [=](unsigned offset, var_types type) { GenTree* base = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); - GenTree* null = m_compiler->gtNewNull(); - GenTree* clear = StoreAtOffset(base, offset, null, TYP_REF); + GenTree* zero = m_compiler->gtNewZeroConNode(type); + GenTree* clear = StoreAtOffset(base, offset, zero, type); LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, clear)); }; @@ -3198,7 +3198,7 @@ void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo { if (retLayout->IsGCPtr(i)) { - clearGCRef(resultOffset + (i * TARGET_POINTER_SIZE)); + clearGCRef(resultOffset + (i * TARGET_POINTER_SIZE), retLayout->GetGCPtrType(i)); } } } @@ -3216,7 +3216,7 @@ void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo } else if (retInfo->Type.ReturnType == TYP_REF) { - clearGCRef(resultOffset); + clearGCRef(resultOffset, TYP_REF); } } From 6574b37e2eca2377efcbe38d143c2edcad99c5db Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 9 Jun 2026 10:57:57 +0200 Subject: [PATCH 3/5] Run jit-format --- src/coreclr/jit/async.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coreclr/jit/async.h b/src/coreclr/jit/async.h index eb7240ee6fceb4..fb9abf2d8be779 100644 --- a/src/coreclr/jit/async.h +++ b/src/coreclr/jit/async.h @@ -476,9 +476,7 @@ class AsyncTransformation const CallDefinitionInfo& callDefInfo, const ContinuationLayout& layout, BasicBlock* storeResultBB); - void ClearReturnValueOnResumption(const ReturnInfo* retInfo, - unsigned resultOffset, - BasicBlock* storeResultBB); + void ClearReturnValueOnResumption(const ReturnInfo* retInfo, unsigned resultOffset, BasicBlock* storeResultBB); GenTreeIndir* LoadFromOffset(GenTree* base, unsigned offset, From 5ed5cbd172fe210c9f8dd7023546d870ea2c69d9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 9 Jun 2026 21:03:13 +0200 Subject: [PATCH 4/5] Feedback --- src/coreclr/jit/async.cpp | 36 ++++++++++++++++++++----- src/coreclr/jit/layout.cpp | 52 +++++++++++++++++++++++++++++++++++ src/coreclr/jit/layout.h | 2 ++ src/coreclr/jit/lower.cpp | 55 +++----------------------------------- src/coreclr/jit/lower.h | 11 ++++---- 5 files changed, 92 insertions(+), 64 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index 5ebfb6dd5acd5c..a38bca80d34c69 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3189,12 +3189,29 @@ void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo return; } - // If there are few GC references, and at most half of the struct is - // made up of GC references, then clear the individual GC pointers - // instead of zeroing out the whole struct. - if ((gcPtrCount <= 4) && ((gcPtrCount * 2) <= retLayout->GetSlotCount())) + // Find the range of slots spanning the first to the last GC reference. A block store only + // needs to cover this range, since everything outside it is non-GC. + unsigned firstSlot = 0; + while (!retLayout->IsGCPtr(firstSlot)) { - for (unsigned i = 0; i < retLayout->GetSlotCount(); i++) + firstSlot++; + } + + unsigned lastSlot = retLayout->GetSlotCount() - 1; + while (!retLayout->IsGCPtr(lastSlot)) + { + lastSlot--; + } + + unsigned sliceSlotCount = lastSlot - firstSlot + 1; + + // If there are few GC references, and at most half of the slice is made up of GC references, + // then clear the individual GC pointers instead of zeroing out the slice. + // Otherwise we prefer to clear the entire slice of GC references as a TYP_STRUCT store to allow + // the backend to use SIMD instructions. + if ((gcPtrCount <= 4) && ((gcPtrCount * 2) <= sliceSlotCount)) + { + for (unsigned i = firstSlot; i <= lastSlot; i++) { if (retLayout->IsGCPtr(i)) { @@ -3204,13 +3221,18 @@ void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo } else { + unsigned sliceOffset = firstSlot * TARGET_POINTER_SIZE; + unsigned sliceSize = sliceSlotCount * TARGET_POINTER_SIZE; + + ClassLayout* sliceLayout = retLayout->SliceLayout(m_compiler, sliceOffset, sliceSize); + GenTree* base = m_compiler->gtNewLclvNode(m_compiler->lvaAsyncContinuationArg, TYP_REF); - GenTree* offset = m_compiler->gtNewIconNode((ssize_t)resultOffset, TYP_I_IMPL); + GenTree* offset = m_compiler->gtNewIconNode((ssize_t)(resultOffset + sliceOffset), TYP_I_IMPL); GenTree* addr = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, base, offset); GenTreeFlags indirFlags = GTF_IND_NONFAULTING | (retInfo->HeapAlignment() < retInfo->Alignment ? GTF_IND_UNALIGNED : GTF_EMPTY); GenTree* zero = m_compiler->gtNewIconNode(0); - GenTree* store = m_compiler->gtNewStoreValueNode(retLayout, addr, zero, indirFlags); + GenTree* store = m_compiler->gtNewStoreValueNode(sliceLayout, addr, zero, indirFlags); LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, store)); } } diff --git a/src/coreclr/jit/layout.cpp b/src/coreclr/jit/layout.cpp index ae42615b878df8..b0965462a39159 100644 --- a/src/coreclr/jit/layout.cpp +++ b/src/coreclr/jit/layout.cpp @@ -698,6 +698,58 @@ const SegmentList& ClassLayout::GetNonPadding(Compiler* comp) return *m_nonPadding; } +//------------------------------------------------------------------------ +// SliceLayout: +// Slice this class layout into the specified range. +// +// Parameters: +// compiler - The compiler instance +// offset - Start offset of the slice +// size - Size of the slice +// +// Returns: +// New layout of size 'size' +// +ClassLayout* ClassLayout::SliceLayout(Compiler* compiler, unsigned offset, unsigned size) +{ + if (offset == 0 && size == GetSize()) + { + return this; + } + + ClassLayoutBuilder builder(compiler, size); + INDEBUG(builder.SetName(compiler->printfAlloc("%s[%03u..%03u)", GetClassName(), offset, offset + size), + compiler->printfAlloc("%s[%03u..%03u)", GetShortClassName(), offset, offset + size))); + + if (((offset % TARGET_POINTER_SIZE) == 0) && ((size % TARGET_POINTER_SIZE) == 0) && HasGCPtr()) + { + for (unsigned i = 0; i < size; i += TARGET_POINTER_SIZE) + { + builder.SetGCPtrType(i / TARGET_POINTER_SIZE, GetGCPtrType((offset + i) / TARGET_POINTER_SIZE)); + } + } + else + { + assert(!HasGCPtr()); + } + + builder.AddPadding(SegmentList::Segment(0, size)); + + for (const SegmentList::Segment& nonPadding : GetNonPadding(compiler)) + { + if ((nonPadding.End <= offset) || (nonPadding.Start >= offset + size)) + { + continue; + } + + unsigned start = nonPadding.Start <= offset ? 0 : (nonPadding.Start - offset); + unsigned end = nonPadding.End >= (offset + size) ? size : (nonPadding.End - offset); + + builder.RemovePadding(SegmentList::Segment(start, end)); + } + return compiler->typGetCustomLayout(builder); +} + //------------------------------------------------------------------------ // AreCompatible: check if 2 layouts are the same for copying. // diff --git a/src/coreclr/jit/layout.h b/src/coreclr/jit/layout.h index 0c79cce85f305c..42d4ce2917e2d9 100644 --- a/src/coreclr/jit/layout.h +++ b/src/coreclr/jit/layout.h @@ -266,6 +266,8 @@ class ClassLayout const SegmentList& GetNonPadding(Compiler* comp); + ClassLayout* SliceLayout(Compiler* compiler, unsigned offset, unsigned size); + static bool AreCompatible(const ClassLayout* layout1, const ClassLayout* layout2); bool CanAssignFrom(const ClassLayout* sourceLayout); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b122b8de775259..a48e1198b706ea 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1703,9 +1703,10 @@ void Lowering::SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* JITDUMP("Dividing split arg [%06u] with %u registers, %u stack space into two arguments\n", Compiler::dspTreeID(arg), numRegs, stackSeg.Size); - ClassLayout* registersLayout = SliceLayout(callArg->GetSignatureLayout(), 0, stackSeg.Offset); - ClassLayout* stackLayout = SliceLayout(callArg->GetSignatureLayout(), stackSeg.Offset, - callArg->GetSignatureLayout()->GetSize() - stackSeg.Offset); + ClassLayout* registersLayout = callArg->GetSignatureLayout()->SliceLayout(m_compiler, 0, stackSeg.Offset); + ClassLayout* stackLayout = + callArg->GetSignatureLayout()->SliceLayout(m_compiler, stackSeg.Offset, + callArg->GetSignatureLayout()->GetSize() - stackSeg.Offset); GenTree* stackNode = nullptr; GenTree* registersNode = nullptr; @@ -1898,54 +1899,6 @@ void Lowering::SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* DISPTREERANGE(BlockRange(), call); } -//------------------------------------------------------------------------ -// SliceLayout: -// Slice a class layout into the specified range. -// -// Parameters: -// layout - The layout -// offset - Start offset of the slice -// size - Size of the slice -// -// Returns: -// New layout of size 'size' -// -ClassLayout* Lowering::SliceLayout(ClassLayout* layout, unsigned offset, unsigned size) -{ - ClassLayoutBuilder builder(m_compiler, size); - INDEBUG( - builder.SetName(m_compiler->printfAlloc("%s[%03u..%03u)", layout->GetClassName(), offset, offset + size), - m_compiler->printfAlloc("%s[%03u..%03u)", layout->GetShortClassName(), offset, offset + size))); - - if (((offset % TARGET_POINTER_SIZE) == 0) && ((size % TARGET_POINTER_SIZE) == 0) && layout->HasGCPtr()) - { - for (unsigned i = 0; i < size; i += TARGET_POINTER_SIZE) - { - builder.SetGCPtrType(i / TARGET_POINTER_SIZE, layout->GetGCPtrType((offset + i) / TARGET_POINTER_SIZE)); - } - } - else - { - assert(!layout->HasGCPtr()); - } - - builder.AddPadding(SegmentList::Segment(0, size)); - - for (const SegmentList::Segment& nonPadding : layout->GetNonPadding(m_compiler)) - { - if ((nonPadding.End <= offset) || (nonPadding.Start >= offset + size)) - { - continue; - } - - unsigned start = nonPadding.Start <= offset ? 0 : (nonPadding.Start - offset); - unsigned end = nonPadding.End >= (offset + size) ? size : (nonPadding.End - offset); - - builder.RemovePadding(SegmentList::Segment(start, end)); - } - return m_compiler->typGetCustomLayout(builder); -} - //------------------------------------------------------------------------ // InsertBitCastIfNecessary: // Insert a bitcast if a primitive argument being passed in a register is not diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index e7c59dca277742..d20b3c39034c3a 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -217,12 +217,11 @@ class Lowering final : public Phase void LowerSpecialCopyArgs(GenTreeCall* call); void InsertSpecialCopyArg(GenTreePutArgStk* putArgStk, CORINFO_CLASS_HANDLE argType, unsigned lclNum); #endif // defined(TARGET_X86) && defined(FEATURE_IJW) - void LowerArg(GenTreeCall* call, CallArg* callArg); - void SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* callArg); - ClassLayout* SliceLayout(ClassLayout* layout, unsigned offset, unsigned size); - void InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment); - void InsertPutArgReg(GenTree** node, const ABIPassingSegment& registerSegment); - void LegalizeArgPlacement(GenTreeCall* call); + void LowerArg(GenTreeCall* call, CallArg* callArg); + void SplitArgumentBetweenRegistersAndStack(GenTreeCall* call, CallArg* callArg); + void InsertBitCastIfNecessary(GenTree** argNode, const ABIPassingSegment& registerSegment); + void InsertPutArgReg(GenTree** node, const ABIPassingSegment& registerSegment); + void LegalizeArgPlacement(GenTreeCall* call); void InsertPInvokeCallProlog(GenTreeCall* call); void InsertPInvokeCallEpilog(GenTreeCall* call); From 70079966d196916c5ecf17c32d21532556d230f1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 10 Jun 2026 13:32:58 +0200 Subject: [PATCH 5/5] Another piece of feedback --- src/coreclr/jit/async.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/async.cpp b/src/coreclr/jit/async.cpp index a38bca80d34c69..df2abfb88f5df7 100644 --- a/src/coreclr/jit/async.cpp +++ b/src/coreclr/jit/async.cpp @@ -3236,9 +3236,9 @@ void AsyncTransformation::ClearReturnValueOnResumption(const ReturnInfo* retInfo LIR::AsRange(storeResultBB).InsertAtEnd(LIR::SeqTree(m_compiler, store)); } } - else if (retInfo->Type.ReturnType == TYP_REF) + else if (varTypeIsGC(retInfo->Type.ReturnType)) { - clearGCRef(resultOffset, TYP_REF); + clearGCRef(resultOffset, retInfo->Type.ReturnType); } }