From cb9785df2fc7451d1d04e9d5caf6b10ecc38177e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Tue, 11 Feb 2025 13:17:21 +0100 Subject: [PATCH 01/23] Generate calls to interface methods through resolve helper --- src/coreclr/inc/corinfo.h | 1 + src/coreclr/inc/jithelpers.h | 1 + src/coreclr/jit/importer.cpp | 6 ++ src/coreclr/jit/importercalls.cpp | 4 +- .../System/Runtime/CachedInterfaceDispatch.cs | 7 --- src/coreclr/nativeaot/Runtime/AsmOffsets.h | 2 + src/coreclr/nativeaot/Runtime/CMakeLists.txt | 8 +++ src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 6 ++ .../Runtime/amd64/DispatchResolve.asm | 55 +++++++++++++++++++ src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 2 + .../Common/JitInterface/CorInfoHelpFunc.cs | 1 + .../JitInterface/CorInfoImpl.RyuJit.cs | 8 ++- 12 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 2128b8cabe7c32..e2c0c33d228594 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -574,6 +574,7 @@ enum CorInfoHelpFunc CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, // Transition to preemptive mode and track transitions in reverse P/Invoke prolog. CORINFO_HELP_GVMLOOKUP_FOR_SLOT, // Resolve a generic virtual method target from this pointer and runtime method handle + CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, // Resolve a non-generic interface method from this pointer and dispatch cell CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 38114a9bbfcada..e5ce70be0221f3 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -309,6 +309,7 @@ JITHELPER(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, JIT_ReversePInvokeExitTrackTransitions, METHOD__NIL) JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, NULL, METHOD__NIL) #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, METHOD__NIL) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index d3de5273c0b613..491a4949517e93 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -2721,6 +2721,12 @@ GenTree* Compiler::impImportLdvirtftn(GenTree* thisPtr, impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_METHOD_HDL, pCallInfo->hMethod); call = gtNewHelperCallNode(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, TYP_I_IMPL, thisPtr, runtimeMethodHandle); } + else if (isInterface && IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + { + GenTree* dispatchCell = + impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_GLOBAL_PTR, pCallInfo->hMethod); + call = gtNewHelperCallNode(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, TYP_I_IMPL, thisPtr, dispatchCell); + } #ifdef FEATURE_READYTORUN else if (opts.IsReadyToRun()) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 319466fd94071d..5c95dbd449026a 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -386,7 +386,7 @@ var_types Compiler::impImportCall(OPCODE opcode, case CORINFO_VIRTUALCALL_LDVIRTFTN: { - if (compIsForInlining()) + if (compIsForInlining() && !IsTargetAbi(CORINFO_NATIVEAOT_ABI)) { compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_CALL_VIA_LDVIRTFTN); return TYP_UNDEF; @@ -430,7 +430,7 @@ var_types Compiler::impImportCall(OPCODE opcode, addFatPointerCandidate(call->AsCall()); } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun()) + if (opts.IsReadyToRun() && !IsTargetAbi(CORINFO_NATIVEAOT_ABI)) { // Null check is needed for ready to run to handle // non-virtual <-> virtual changes between versions diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs index 9a1b1ad7ef985a..3c217ab8d9e5f5 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs @@ -49,13 +49,6 @@ private static IntPtr RhpCidResolve_Worker(object pObject, IntPtr pCell) [RuntimeExport("RhpResolveInterfaceMethod")] private static IntPtr RhpResolveInterfaceMethod(object pObject, IntPtr pCell) { - if (pObject == null) - { - // Optimizer may perform code motion on dispatch such that it occurs independant of - // null check on "this" pointer. Allow for this case by returning back an invalid pointer. - return IntPtr.Zero; - } - MethodTable* pInstanceType = pObject.GetMethodTable(); // This method is used for the implementation of LOAD_VIRT_FUNCTION and in that case the mapping we want diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 0284daba94d541..8afde6ca76fea3 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -71,8 +71,10 @@ ASM_OFFSET( 4, 8, InterfaceDispatchCell, m_pCache) #ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER ASM_OFFSET( 8, 0, InterfaceDispatchCache, m_pCell) #endif +ASM_OFFSET( C, 18, InterfaceDispatchCache, m_cEntries) ASM_OFFSET( 10, 20, InterfaceDispatchCache, m_rgEntries) ASM_SIZEOF( 8, 10, InterfaceDispatchCacheEntry) +ASM_CONST( 3, 3, IDC_CACHE_POINTER_MASK) #endif // Undefine macros that are only used in this header for convenience. diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 8cf45e0018bfa8..034b810432576a 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -213,6 +213,14 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) +if(CLR_CMAKE_TARGET_WIN32) + if (CLR_CMAKE_TARGET_ARCH_AMD64) + list(APPEND RUNTIME_SOURCES_ARCH_ASM + ${ARCH_SOURCES_DIR}/DispatchResolve.${ASM_SUFFIX} + ) + endif() +endif() + # Add architecture specific folder for looking up headers. convert_to_absolute_path(ARCH_SOURCES_DIR ${ARCH_SOURCES_DIR}) include_directories(${ARCH_SOURCES_DIR}) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index a3643e32f5eaf7..bb52fa4554227d 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -266,6 +266,9 @@ EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation8; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation16; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation32; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation64; +#if defined(HOST_AMD64) && defined(HOST_WINDOWS) +EXTERN_C CODE_LOCATION RhpResolveInterfaceMethodFast; +#endif static bool InInterfaceDispatchHelper(uintptr_t faultingIP) { @@ -280,6 +283,9 @@ static bool InInterfaceDispatchHelper(uintptr_t faultingIP) (uintptr_t)&RhpInterfaceDispatchAVLocation16, (uintptr_t)&RhpInterfaceDispatchAVLocation32, (uintptr_t)&RhpInterfaceDispatchAVLocation64, +#if defined(HOST_AMD64) && defined(HOST_WINDOWS) + (uintptr_t)&RhpResolveInterfaceMethodFast, +#endif }; // compare the IP against the list of known possible AV locations in the interface dispatch helpers diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm new file mode 100644 index 00000000000000..9610d3c4c49c38 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -0,0 +1,55 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + + +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + +EXTERN RhpResolveInterfaceMethod : PROC + +;; Fast version of RhpResolveInterfaceMethod +LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT + + ;; Load the MethodTable from the object instance in rcx. + ;; Trigger an AV if we're dispatching on a null this. + ;; The exception handling infrastructure is aware of the fact that this is the first + ;; instruction of RhpResolveInterfaceMethodFast and uses it to translate an AV here + ;; to a NullReferenceException at the callsite. + mov r9, [rcx] + + ;; rdx currently contains the indirection cell address. + ;; load r8 to point to the cache block. + mov r8, [rdx + OFFSETOF__InterfaceDispatchCell__m_pCache] + test r8b, IDC_CACHE_POINTER_MASK + jne RhpResolveInterfaceMethodFast_SlowPath + + lea rax, [r8 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] + cmp qword ptr [rax], r9 + jne RhpResolveInterfaceMethodFast_Polymorphic + mov rax, qword ptr [rax + 8] + ret + + RhpResolveInterfaceMethodFast_Polymorphic: + mov r8d, dword ptr [r8 + OFFSETOF__InterfaceDispatchCache__m_cEntries] + + RhpResolveInterfaceMethodFast_NextEntry: + add rax, SIZEOF__InterfaceDispatchCacheEntry + dec r8d + jz RhpResolveInterfaceMethodFast_SlowPath + + cmp qword ptr [rax], r9 + jne RhpResolveInterfaceMethodFast_NextEntry + + mov rax, qword ptr [rax + 8] + ret + + RhpResolveInterfaceMethodFast_SlowPath: + jmp RhpResolveInterfaceMethod + +LEAF_END RhpResolveInterfaceMethodFast, _TEXT + +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + +end diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index b33f4376cbd893..319a7b2628f415 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -236,6 +236,8 @@ struct InterfaceDispatchCell } }; +#define IDC_CACHE_POINTER_MASK (InterfaceDispatchCell::Flags::IDC_CachePointerMask) + #endif // FEATURE_CACHED_INTERFACE_DISPATCH #ifdef TARGET_ARM diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 04bca41e476671..7f1697d813953f 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -264,6 +264,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, // Transition to preemptive mode and track transitions in reverse P/Invoke prolog. CORINFO_HELP_GVMLOOKUP_FOR_SLOT, // Resolve a generic virtual method target from this pointer and runtime method handle + CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, // Resolve a non-generic interface method from this pointer and dispatch cell CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index b4933c28b25afc..aa0eace27ab225 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -761,6 +761,8 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_GVMLOOKUP_FOR_SLOT: id = ReadyToRunHelper.GVMLookupForSlot; break; + case CorInfoHelpFunc.CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: + return _compilation.NodeFactory.ExternSymbol(_compilation.TypeSystemContext.Target.IsWindows && _compilation.TypeSystemContext.Target.Architecture == TargetArchitecture.X64 ? "RhpResolveInterfaceMethodFast" : "RhpResolveInterfaceMethod"); case CorInfoHelpFunc.CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL: id = ReadyToRunHelper.TypeHandleToRuntimeType; @@ -1588,7 +1590,8 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO else if ((flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) == 0 && targetMethod.OwningType.IsInterface) { - pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_STUB; + pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_LDVIRTFTN; + //pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_STUB; if (pResult->exactContextNeedsRuntimeLookup) { @@ -1602,7 +1605,8 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO else { pResult->codePointerOrStubLookup.lookupKind.needsRuntimeLookup = false; - pResult->codePointerOrStubLookup.constLookup.accessType = InfoAccessType.IAT_PVALUE; + pResult->codePointerOrStubLookup.constLookup.accessType = InfoAccessType.IAT_VALUE; + //pResult->codePointerOrStubLookup.constLookup.accessType = InfoAccessType.IAT_PVALUE; #pragma warning disable SA1001, SA1113, SA1115 // Commas should be spaced correctly pResult->codePointerOrStubLookup.constLookup.addr = (void*)ObjectToHandle( _compilation.NodeFactory.InterfaceDispatchCell(targetMethod From 6bdcdb17e91819523725488e91dcfb41d22b9fda Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 12 Feb 2025 13:49:21 +0100 Subject: [PATCH 02/23] Expand resolver during CFG expansion --- src/coreclr/jit/codegencommon.cpp | 5 + src/coreclr/jit/gentree.cpp | 41 ++++++++ src/coreclr/jit/gentree.h | 1 + src/coreclr/jit/importer.cpp | 6 -- src/coreclr/jit/lower.cpp | 96 ++++++++++++++++++- src/coreclr/jit/morph.cpp | 4 +- src/coreclr/jit/targetamd64.h | 2 + .../Runtime/amd64/DispatchResolve.asm | 14 +-- .../JitInterface/CorInfoImpl.RyuJit.cs | 6 +- 9 files changed, 153 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2318d4223ad812..5e49976d6503e0 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -735,6 +735,11 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_VALIDATE_INDIRECT_CALL: return RBM_VALIDATE_INDIRECT_CALL_TRASH; +#ifdef RBM_INTERFACELOOKUP_FOR_SLOT_TRASH + case CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: + return RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; +#endif + default: return RBM_CALLEE_TRASH; } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ea02025450b39a..76dbf29e272e51 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2018,6 +2018,47 @@ void CallArgs::Remove(CallArg* arg) assert(!"Did not find arg to remove in CallArgs::Remove"); } +//--------------------------------------------------------------- +// RemoveLate: Remove an argument from the argument list and late argument list. +// +// Parameters: +// arg - The arg to remove. +// +// Remarks: +// This can be used to use arguments after ABI determination and after morph. +// It removes the argument from both the early and late list. However, no ABI +// information is updated. Caller needs to know what they are doing. +// +void CallArgs::RemoveLate(CallArg* arg) +{ + CallArg** slot = &m_lateHead; + while (*slot != nullptr) + { + if (*slot == arg) + { + *slot = arg->GetLateNext(); + break; + } + + slot = &(*slot)->LateNextRef(); + } + + slot = &m_head; + while (*slot != nullptr) + { + if (*slot == arg) + { + *slot = arg->GetNext(); + RemovedWellKnownArg(arg->GetWellKnownArg()); + return; + } + + slot = &(*slot)->NextRef(); + } + + assert(!"Did not find arg to remove in CallArgs::RemoveLate"); +} + #ifdef TARGET_XARCH //--------------------------------------------------------------- // NeedsVzeroupper: Determines if the call needs a vzeroupper emitted before it is invoked diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 641f0b05e1f61a..360af08c8c1c37 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4938,6 +4938,7 @@ class CallArgs CallArg* InsertAfterThisOrFirst(Compiler* comp, const NewCallArg& arg); void PushLateBack(CallArg* arg); void Remove(CallArg* arg); + void RemoveLate(CallArg* arg); template void InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc copyFunc); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 491a4949517e93..d3de5273c0b613 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -2721,12 +2721,6 @@ GenTree* Compiler::impImportLdvirtftn(GenTree* thisPtr, impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_METHOD_HDL, pCallInfo->hMethod); call = gtNewHelperCallNode(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, TYP_I_IMPL, thisPtr, runtimeMethodHandle); } - else if (isInterface && IsTargetAbi(CORINFO_NATIVEAOT_ABI)) - { - GenTree* dispatchCell = - impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_GLOBAL_PTR, pCallInfo->hMethod); - call = gtNewHelperCallNode(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, TYP_I_IMPL, thisPtr, dispatchCell); - } #ifdef FEATURE_READYTORUN else if (opts.IsReadyToRun()) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index eee9a5859ecb43..83e27b7f4de204 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3620,8 +3620,98 @@ void Lowering::LowerCFGCall(GenTreeCall* call) { return; } + auto cloneUse = [=](LIR::Use& use, bool cloneConsts) -> GenTree* { + bool canClone = cloneConsts && use.Def()->IsCnsIntOrI(); + if (!canClone && use.Def()->OperIs(GT_LCL_VAR)) + { + canClone = !comp->lvaGetDesc(use.Def()->AsLclVarCommon())->IsAddressExposed(); + } + + if (canClone) + { + return comp->gtCloneExpr(use.Def()); + } + else + { + unsigned newLcl = use.ReplaceWithLclVar(comp); + return comp->gtNewLclvNode(newLcl, TYP_I_IMPL); + } + }; GenTree* callTarget = call->gtCallType == CT_INDIRECT ? call->gtCallAddr : call->gtControlExpr; + + if (call->IsVirtualStub()) + { + // VSDs go through a resolver instead which skips double validation and + // indirection. + CallArg* vsdCellArg = call->gtArgs.FindWellKnownArg(WellKnownArg::VirtualStubCell); + CallArg* thisArg = call->gtArgs.GetThisArg(); + + assert((vsdCellArg != nullptr) && (thisArg != nullptr)); + assert(thisArg->GetNode()->OperIs(GT_PUTARG_REG)); + LIR::Use thisArgUse(BlockRange(), &thisArg->GetNode()->AsOp()->gtOp1, thisArg->GetNode()); + GenTree* thisArgClone = cloneUse(thisArgUse, true); + + // The VSD cell is not needed for the original call when going through the resolver. + // It can be removed without further fixups because it has fixed ABI assignment. + call->gtArgs.RemoveLate(vsdCellArg); + assert(vsdCellArg->GetNode()->OperIs(GT_PUTARG_REG)); + // Also PUTARG_REG can be removed. + BlockRange().Remove(vsdCellArg->GetNode()); + // The actual cell we need for the resolver. + GenTree* vsdCellArgNode = vsdCellArg->GetNode()->gtGetOp1(); + + GenTreeCall* resolve = comp->gtNewHelperCallNode(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, TYP_I_IMPL); + + // Use a placeholder for the cell since the cell is already inserted in + // LIR. + GenTree* vsdCellPlaceholder = comp->gtNewZeroConNode(TYP_I_IMPL); + resolve->gtArgs.PushFront(comp, + NewCallArg::Primitive(vsdCellPlaceholder).WellKnown(WellKnownArg::VirtualStubCell)); + + // 'this' arg clone is not inserted, so no need to use a placeholder for that. + resolve->gtArgs.PushFront(comp, NewCallArg::Primitive(thisArgClone)); + + comp->fgMorphTree(resolve); + + LIR::Range resolveRange = LIR::SeqTree(comp, resolve); + GenTree* resolveFirst = resolveRange.FirstNode(); + GenTree* resolveLast = resolveRange.LastNode(); + // Resolution comes with a null check, so it must happen after all + // arguments are evaluated, hence we insert it right before the call. + BlockRange().InsertBefore(call, std::move(resolveRange)); + + // Swap out the VSD cell argument. + LIR::Use vsdCellUse; + bool gotUse = BlockRange().TryGetUse(vsdCellPlaceholder, &vsdCellUse); + assert(gotUse); + vsdCellUse.ReplaceWith(vsdCellArgNode); + vsdCellPlaceholder->SetUnusedValue(); + + // Now we can lower the resolver. + LowerRange(resolveFirst, resolveLast); + + // That inserted new PUTARG nodes right before the call, so we need to + // legalize the existing call's PUTARG_REG nodes. + MoveCFGCallArgs(call); + + // Finally update the call target + call->gtCallType = CT_INDIRECT; + call->gtFlags &= ~GTF_CALL_VIRT_STUB; + call->gtCallAddr = resolve; +#ifdef FEATURE_READYTORUN + call->gtEntryPoint.addr = nullptr; + call->gtEntryPoint.accessType = IAT_VALUE; +#endif + + if (callTarget != nullptr) + { + callTarget->SetUnusedValue(); + } + + callTarget = resolve; + } + if (callTarget == nullptr) { assert((call->gtCallType != CT_INDIRECT) && (!call->IsVirtual() || call->IsVirtualStubRelativeIndir())); @@ -3648,7 +3738,7 @@ void Lowering::LowerCFGCall(GenTreeCall* call) cloneConsts = true; #endif - GenTree* indirCellClone; + GenTree* indirCellClone = cloneUse(indirCellArgUse, cloneConsts); if (indirCellArgUse.Def()->OperIs(GT_LCL_VAR) || (cloneConsts && indirCellArgUse.Def()->IsCnsIntOrI())) { @@ -6656,7 +6746,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // fgMorphArgs will have created trees to pass the address in VirtualStubParam.reg. // All we have to do here is add an indirection to generate the actual call target. - GenTree* ind = Ind(call->gtCallAddr); + GenTree* ind = comp->gtNewIndir(TYP_I_IMPL, call->gtCallAddr, GTF_IND_NONFAULTING); BlockRange().InsertAfter(call->gtCallAddr, ind); call->gtCallAddr = ind; @@ -6698,7 +6788,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) if (!shouldOptimizeVirtualStubCall) { - result = Ind(addr); + result = comp->gtNewIndir(TYP_I_IMPL, addr, GTF_IND_NONFAULTING); } } } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 79cfd794c9d0d6..8bf9b8ab972d48 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1155,12 +1155,12 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) } } - // When CFG is enabled and this is a delegate call or vtable call we must + // When CFG is enabled and this is a delegate call or virtual call we must // compute the call target before all late args. However this will // effectively null-check 'this', which should happen only after all // arguments are evaluated. Thus we must evaluate all args with side // effects to a temp. - if (comp->opts.IsCFGEnabled() && (call->IsVirtualVtable() || call->IsDelegateInvoke())) + if (comp->opts.IsCFGEnabled() && (call->IsVirtual() || call->IsDelegateInvoke())) { // Always evaluate 'this' to temp. assert(HasThisPointer()); diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 869bf1944ce5f0..5208561cff9f06 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -543,6 +543,8 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX)) #define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm index 9610d3c4c49c38..459d7307ff563f 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -19,24 +19,24 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT ;; to a NullReferenceException at the callsite. mov r9, [rcx] - ;; rdx currently contains the indirection cell address. - ;; load r8 to point to the cache block. - mov r8, [rdx + OFFSETOF__InterfaceDispatchCell__m_pCache] - test r8b, IDC_CACHE_POINTER_MASK + ;; r10 currently contains the indirection cell address. + ;; load r10 to point to the cache block. + mov r10, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + test r10b, IDC_CACHE_POINTER_MASK jne RhpResolveInterfaceMethodFast_SlowPath - lea rax, [r8 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] + lea rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] cmp qword ptr [rax], r9 jne RhpResolveInterfaceMethodFast_Polymorphic mov rax, qword ptr [rax + 8] ret RhpResolveInterfaceMethodFast_Polymorphic: - mov r8d, dword ptr [r8 + OFFSETOF__InterfaceDispatchCache__m_cEntries] + mov r10d, dword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_cEntries] RhpResolveInterfaceMethodFast_NextEntry: add rax, SIZEOF__InterfaceDispatchCacheEntry - dec r8d + dec r10d jz RhpResolveInterfaceMethodFast_SlowPath cmp qword ptr [rax], r9 diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index aa0eace27ab225..29ba0a13aef6f3 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -1590,8 +1590,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO else if ((flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) == 0 && targetMethod.OwningType.IsInterface) { - pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_LDVIRTFTN; - //pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_STUB; + pResult->kind = CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_STUB; if (pResult->exactContextNeedsRuntimeLookup) { @@ -1605,8 +1604,7 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO else { pResult->codePointerOrStubLookup.lookupKind.needsRuntimeLookup = false; - pResult->codePointerOrStubLookup.constLookup.accessType = InfoAccessType.IAT_VALUE; - //pResult->codePointerOrStubLookup.constLookup.accessType = InfoAccessType.IAT_PVALUE; + pResult->codePointerOrStubLookup.constLookup.accessType = InfoAccessType.IAT_PVALUE; #pragma warning disable SA1001, SA1113, SA1115 // Commas should be spaced correctly pResult->codePointerOrStubLookup.constLookup.addr = (void*)ObjectToHandle( _compilation.NodeFactory.InterfaceDispatchCell(targetMethod From 7f879d6accfbc2ec5b486b811b16e4e65e3164ee Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 12 Feb 2025 13:54:08 +0100 Subject: [PATCH 03/23] Revert register constraint changes --- src/coreclr/jit/codegencommon.cpp | 5 ----- src/coreclr/jit/lower.cpp | 3 +-- src/coreclr/jit/targetamd64.h | 2 -- .../nativeaot/Runtime/amd64/DispatchResolve.asm | 14 +++++++------- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 5e49976d6503e0..2318d4223ad812 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -735,11 +735,6 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_VALIDATE_INDIRECT_CALL: return RBM_VALIDATE_INDIRECT_CALL_TRASH; -#ifdef RBM_INTERFACELOOKUP_FOR_SLOT_TRASH - case CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: - return RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; -#endif - default: return RBM_CALLEE_TRASH; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 83e27b7f4de204..c6e14bc4fa0a48 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3666,8 +3666,7 @@ void Lowering::LowerCFGCall(GenTreeCall* call) // Use a placeholder for the cell since the cell is already inserted in // LIR. GenTree* vsdCellPlaceholder = comp->gtNewZeroConNode(TYP_I_IMPL); - resolve->gtArgs.PushFront(comp, - NewCallArg::Primitive(vsdCellPlaceholder).WellKnown(WellKnownArg::VirtualStubCell)); + resolve->gtArgs.PushFront(comp, NewCallArg::Primitive(vsdCellPlaceholder)); // 'this' arg clone is not inserted, so no need to use a placeholder for that. resolve->gtArgs.PushFront(comp, NewCallArg::Primitive(thisArgClone)); diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 5208561cff9f06..869bf1944ce5f0 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -543,8 +543,6 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH - #define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11) - #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX)) #define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm index 459d7307ff563f..9610d3c4c49c38 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -19,24 +19,24 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT ;; to a NullReferenceException at the callsite. mov r9, [rcx] - ;; r10 currently contains the indirection cell address. - ;; load r10 to point to the cache block. - mov r10, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - test r10b, IDC_CACHE_POINTER_MASK + ;; rdx currently contains the indirection cell address. + ;; load r8 to point to the cache block. + mov r8, [rdx + OFFSETOF__InterfaceDispatchCell__m_pCache] + test r8b, IDC_CACHE_POINTER_MASK jne RhpResolveInterfaceMethodFast_SlowPath - lea rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] + lea rax, [r8 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] cmp qword ptr [rax], r9 jne RhpResolveInterfaceMethodFast_Polymorphic mov rax, qword ptr [rax + 8] ret RhpResolveInterfaceMethodFast_Polymorphic: - mov r10d, dword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_cEntries] + mov r8d, dword ptr [r8 + OFFSETOF__InterfaceDispatchCache__m_cEntries] RhpResolveInterfaceMethodFast_NextEntry: add rax, SIZEOF__InterfaceDispatchCacheEntry - dec r10d + dec r8d jz RhpResolveInterfaceMethodFast_SlowPath cmp qword ptr [rax], r9 From 6d45447f352c6b2207e14eb102b9ab85834df1b3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 12 Feb 2025 13:58:45 +0100 Subject: [PATCH 04/23] Nit --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 76dbf29e272e51..95cede45de2f2b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2025,7 +2025,7 @@ void CallArgs::Remove(CallArg* arg) // arg - The arg to remove. // // Remarks: -// This can be used to use arguments after ABI determination and after morph. +// This can be used to remove arguments after ABI determination and after morph. // It removes the argument from both the early and late list. However, no ABI // information is updated. Caller needs to know what they are doing. // From 9d0a3bda33d660eb6eee9310bbe2441f932c0ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Thu, 13 Feb 2025 09:35:10 +0100 Subject: [PATCH 05/23] Revert "Revert register constraint changes" This reverts commit 7f879d6accfbc2ec5b486b811b16e4e65e3164ee. --- src/coreclr/jit/codegencommon.cpp | 5 +++++ src/coreclr/jit/lower.cpp | 3 ++- src/coreclr/jit/targetamd64.h | 2 ++ .../nativeaot/Runtime/amd64/DispatchResolve.asm | 14 +++++++------- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2318d4223ad812..5e49976d6503e0 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -735,6 +735,11 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_VALIDATE_INDIRECT_CALL: return RBM_VALIDATE_INDIRECT_CALL_TRASH; +#ifdef RBM_INTERFACELOOKUP_FOR_SLOT_TRASH + case CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: + return RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; +#endif + default: return RBM_CALLEE_TRASH; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c6e14bc4fa0a48..83e27b7f4de204 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3666,7 +3666,8 @@ void Lowering::LowerCFGCall(GenTreeCall* call) // Use a placeholder for the cell since the cell is already inserted in // LIR. GenTree* vsdCellPlaceholder = comp->gtNewZeroConNode(TYP_I_IMPL); - resolve->gtArgs.PushFront(comp, NewCallArg::Primitive(vsdCellPlaceholder)); + resolve->gtArgs.PushFront(comp, + NewCallArg::Primitive(vsdCellPlaceholder).WellKnown(WellKnownArg::VirtualStubCell)); // 'this' arg clone is not inserted, so no need to use a placeholder for that. resolve->gtArgs.PushFront(comp, NewCallArg::Primitive(thisArgClone)); diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 869bf1944ce5f0..5208561cff9f06 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -543,6 +543,8 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX)) #define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm index 9610d3c4c49c38..459d7307ff563f 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -19,24 +19,24 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT ;; to a NullReferenceException at the callsite. mov r9, [rcx] - ;; rdx currently contains the indirection cell address. - ;; load r8 to point to the cache block. - mov r8, [rdx + OFFSETOF__InterfaceDispatchCell__m_pCache] - test r8b, IDC_CACHE_POINTER_MASK + ;; r10 currently contains the indirection cell address. + ;; load r10 to point to the cache block. + mov r10, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + test r10b, IDC_CACHE_POINTER_MASK jne RhpResolveInterfaceMethodFast_SlowPath - lea rax, [r8 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] + lea rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] cmp qword ptr [rax], r9 jne RhpResolveInterfaceMethodFast_Polymorphic mov rax, qword ptr [rax + 8] ret RhpResolveInterfaceMethodFast_Polymorphic: - mov r8d, dword ptr [r8 + OFFSETOF__InterfaceDispatchCache__m_cEntries] + mov r10d, dword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_cEntries] RhpResolveInterfaceMethodFast_NextEntry: add rax, SIZEOF__InterfaceDispatchCacheEntry - dec r8d + dec r10d jz RhpResolveInterfaceMethodFast_SlowPath cmp qword ptr [rax], r9 From 2f0a2854f609acdd91ec30b809b7386e306c4f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Thu, 13 Feb 2025 18:17:42 +0100 Subject: [PATCH 06/23] Scary hack --- .../Runtime/amd64/DispatchResolve.asm | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm index 459d7307ff563f..2221607197c62d 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -17,36 +17,50 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT ;; The exception handling infrastructure is aware of the fact that this is the first ;; instruction of RhpResolveInterfaceMethodFast and uses it to translate an AV here ;; to a NullReferenceException at the callsite. - mov r9, [rcx] + mov rax, [rcx] ;; r10 currently contains the indirection cell address. - ;; load r10 to point to the cache block. - mov r10, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - test r10b, IDC_CACHE_POINTER_MASK - jne RhpResolveInterfaceMethodFast_SlowPath + ;; load r11 to point to the cache block. + mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + test r11b, IDC_CACHE_POINTER_MASK + jne RhpResolveInterfaceMethodFast_SlowPath_Push - lea rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] - cmp qword ptr [rax], r9 + lea r11, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] + cmp qword ptr [r11], rax jne RhpResolveInterfaceMethodFast_Polymorphic - mov rax, qword ptr [rax + 8] + mov rax, qword ptr [r11 + 8] ret RhpResolveInterfaceMethodFast_Polymorphic: - mov r10d, dword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_cEntries] + push rdx + mov rdx, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + mov r11d, dword ptr [rdx + OFFSETOF__InterfaceDispatchCache__m_cEntries] RhpResolveInterfaceMethodFast_NextEntry: - add rax, SIZEOF__InterfaceDispatchCacheEntry - dec r10d + add rdx, SIZEOF__InterfaceDispatchCacheEntry + dec r11d jz RhpResolveInterfaceMethodFast_SlowPath - cmp qword ptr [rax], r9 + cmp qword ptr [rdx], rax jne RhpResolveInterfaceMethodFast_NextEntry - mov rax, qword ptr [rax + 8] + mov rax, qword ptr [rdx + 8] + pop rdx ret + RhpResolveInterfaceMethodFast_SlowPath_Push: + push rdx RhpResolveInterfaceMethodFast_SlowPath: - jmp RhpResolveInterfaceMethod + push rcx + push r8 + push r9 + mov rdx, r10 + call RhpResolveInterfaceMethod + pop r9 + pop r8 + pop rcx + pop rdx + ret LEAF_END RhpResolveInterfaceMethodFast, _TEXT From 6cd6567adecb814f42d88a996e5f22a6fe9ffc4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Fri, 14 Feb 2025 11:09:11 +0100 Subject: [PATCH 07/23] Universal transition --- src/coreclr/jit/importercalls.cpp | 4 +-- .../System/Runtime/CachedInterfaceDispatch.cs | 7 ++++++ src/coreclr/nativeaot/Runtime/CMakeLists.txt | 2 +- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 12 ++++----- .../nativeaot/Runtime/StackFrameIterator.cpp | 12 ++++++++- .../Runtime/amd64/DispatchResolve.asm | 25 +++++++------------ .../Runtime/amd64/UniversalTransition.asm | 10 +++++--- .../Runtime/arm64/DispatchResolve.asm | 23 +++++++++++++++++ .../Runtime/arm64/UniversalTransition.asm | 15 ++++++++--- .../JitInterface/CorInfoImpl.RyuJit.cs | 2 +- 10 files changed, 78 insertions(+), 34 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 5c95dbd449026a..319466fd94071d 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -386,7 +386,7 @@ var_types Compiler::impImportCall(OPCODE opcode, case CORINFO_VIRTUALCALL_LDVIRTFTN: { - if (compIsForInlining() && !IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + if (compIsForInlining()) { compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_CALL_VIA_LDVIRTFTN); return TYP_UNDEF; @@ -430,7 +430,7 @@ var_types Compiler::impImportCall(OPCODE opcode, addFatPointerCandidate(call->AsCall()); } #ifdef FEATURE_READYTORUN - if (opts.IsReadyToRun() && !IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + if (opts.IsReadyToRun()) { // Null check is needed for ready to run to handle // non-virtual <-> virtual changes between versions diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs index 3c217ab8d9e5f5..9a1b1ad7ef985a 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/CachedInterfaceDispatch.cs @@ -49,6 +49,13 @@ private static IntPtr RhpCidResolve_Worker(object pObject, IntPtr pCell) [RuntimeExport("RhpResolveInterfaceMethod")] private static IntPtr RhpResolveInterfaceMethod(object pObject, IntPtr pCell) { + if (pObject == null) + { + // Optimizer may perform code motion on dispatch such that it occurs independant of + // null check on "this" pointer. Allow for this case by returning back an invalid pointer. + return IntPtr.Zero; + } + MethodTable* pInstanceType = pObject.GetMethodTable(); // This method is used for the implementation of LOAD_VIRT_FUNCTION and in that case the mapping we want diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 034b810432576a..caab6bf43f8754 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -214,7 +214,7 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ) if(CLR_CMAKE_TARGET_WIN32) - if (CLR_CMAKE_TARGET_ARCH_AMD64) + if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/DispatchResolve.${ASM_SUFFIX} ) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index bb52fa4554227d..bd7435775479b1 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -258,6 +258,9 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) return false; } +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) +EXTERN_C CODE_LOCATION RhpResolveInterfaceMethodFast; +#endif EXTERN_C CODE_LOCATION RhpInitialInterfaceDispatch; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation1; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation2; @@ -266,15 +269,15 @@ EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation8; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation16; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation32; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation64; -#if defined(HOST_AMD64) && defined(HOST_WINDOWS) -EXTERN_C CODE_LOCATION RhpResolveInterfaceMethodFast; -#endif static bool InInterfaceDispatchHelper(uintptr_t faultingIP) { #ifndef USE_PORTABLE_HELPERS static uintptr_t interfaceDispatchAVLocations[] = { +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) + (uintptr_t)&RhpResolveInterfaceMethodFast, +#endif (uintptr_t)&RhpInitialInterfaceDispatch, (uintptr_t)&RhpInterfaceDispatchAVLocation1, (uintptr_t)&RhpInterfaceDispatchAVLocation2, @@ -283,9 +286,6 @@ static bool InInterfaceDispatchHelper(uintptr_t faultingIP) (uintptr_t)&RhpInterfaceDispatchAVLocation16, (uintptr_t)&RhpInterfaceDispatchAVLocation32, (uintptr_t)&RhpInterfaceDispatchAVLocation64, -#if defined(HOST_AMD64) && defined(HOST_WINDOWS) - (uintptr_t)&RhpResolveInterfaceMethodFast, -#endif }; // compare the IP against the list of known possible AV locations in the interface dispatch helpers diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 302bd05861a509..703643beea6e5c 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -41,6 +41,10 @@ #if defined(FEATURE_DYNAMIC_CODE) EXTERN_C CODE_LOCATION ReturnFromUniversalTransition; EXTERN_C CODE_LOCATION ReturnFromUniversalTransition_DebugStepTailCall; +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) +EXTERN_C CODE_LOCATION ReturnFromUniversalTransitionReturnResult; +EXTERN_C CODE_LOCATION ReturnFromUniversalTransitionReturnResult_DebugStepTailCall; +#endif #endif EXTERN_C CODE_LOCATION RhpCallCatchFunclet2; @@ -2241,7 +2245,13 @@ StackFrameIterator::ReturnAddressCategory StackFrameIterator::CategorizeUnadjust #if defined(FEATURE_DYNAMIC_CODE) if (EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition) || - EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition_DebugStepTailCall)) + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition_DebugStepTailCall) +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) + || + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransitionReturnResult) || + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransitionReturnResult_DebugStepTailCall) +#endif + ) { return InUniversalTransitionThunk; } diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm index 2221607197c62d..60f31dbcd2a7eb 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -6,8 +6,8 @@ include AsmMacros.inc ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -EXTERN RhpResolveInterfaceMethod : PROC +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransitionReturnResult_DebugStepTailCall : PROC ;; Fast version of RhpResolveInterfaceMethod LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT @@ -23,7 +23,7 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT ;; load r11 to point to the cache block. mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] test r11b, IDC_CACHE_POINTER_MASK - jne RhpResolveInterfaceMethodFast_SlowPath_Push + jne RhpResolveInterfaceMethodFast_SlowPath lea r11, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] cmp qword ptr [r11], rax @@ -39,7 +39,7 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT RhpResolveInterfaceMethodFast_NextEntry: add rdx, SIZEOF__InterfaceDispatchCacheEntry dec r11d - jz RhpResolveInterfaceMethodFast_SlowPath + jz RhpResolveInterfaceMethodFast_SlowPath_Pop cmp qword ptr [rdx], rax jne RhpResolveInterfaceMethodFast_NextEntry @@ -48,19 +48,12 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT pop rdx ret - RhpResolveInterfaceMethodFast_SlowPath_Push: - push rdx - RhpResolveInterfaceMethodFast_SlowPath: - push rcx - push r8 - push r9 - mov rdx, r10 - call RhpResolveInterfaceMethod - pop r9 - pop r8 - pop rcx + RhpResolveInterfaceMethodFast_SlowPath_Pop: pop rdx - ret + RhpResolveInterfaceMethodFast_SlowPath: + mov r11, r10 + lea r10, RhpCidResolve + jmp RhpUniversalTransitionReturnResult_DebugStepTailCall LEAF_END RhpResolveInterfaceMethodFast, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm b/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm index 777ef9d2de620c..91eaf95a48eda0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm @@ -84,7 +84,7 @@ DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_RET ; everything between the base of the ReturnBlock and the top of the StackPassedArgs. ; -UNIVERSAL_TRANSITION macro FunctionName +UNIVERSAL_TRANSITION macro FunctionName, ExitSequence NESTED_ENTRY Rhp&FunctionName, _TEXT @@ -146,7 +146,7 @@ ALTERNATE_ENTRY ReturnFrom&FunctionName ; Pop the space that was allocated between the ChildSP and the caller return address. add rsp, DISTANCE_FROM_CHILDSP_TO_RETADDR - TAILJMP_RAX + ExitSequence NESTED_END Rhp&FunctionName, _TEXT @@ -155,8 +155,10 @@ NESTED_END Rhp&FunctionName, _TEXT ; To enable proper step-in behavior in the debugger, we need to have two instances ; of the thunk. For the first one, the debugger steps into the call in the function, ; for the other, it steps over it. - UNIVERSAL_TRANSITION UniversalTransition - UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + UNIVERSAL_TRANSITION UniversalTransition, TAILJMP_RAX + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall, TAILJMP_RAX + UNIVERSAL_TRANSITION UniversalTransitionReturnResult, ret + UNIVERSAL_TRANSITION UniversalTransitionReturnResult_DebugStepTailCall, ret endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm new file mode 100644 index 00000000000000..61ea19599927b0 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm @@ -0,0 +1,23 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransitionReturnResult_DebugStepTailCall + + NESTED_ENTRY RhpResolveInterfaceMethodFast, _TEXT + + ldr xip0, =RhpCidResolve + mov xip1, x11 + b RhpUniversalTransitionReturnResult_DebugStepTailCall + + NESTED_END RhpResolveInterfaceMethodFast + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm b/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm index 7d3607f27a01eb..6a843f918f8033 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm @@ -89,7 +89,7 @@ TEXTAREA MACRO - UNIVERSAL_TRANSITION $FunctionName + UNIVERSAL_TRANSITION $FunctionName, $ReturnResult NESTED_ENTRY Rhp$FunctionName @@ -142,8 +142,14 @@ ;; Restore FP and LR registers, and free the allocated stack block EPILOG_RESTORE_REG_PAIR fp, lr, #STACK_SIZE! + IF $ReturnResult == 0 ;; Tailcall to the target address. EPILOG_NOP br x12 + ELSE + ;; Return target address + EPILOG_NOP mov x0, x12 + ret + ENDIF NESTED_END Rhp$FunctionName @@ -152,7 +158,10 @@ ; To enable proper step-in behavior in the debugger, we need to have two instances ; of the thunk. For the first one, the debugger steps into the call in the function, ; for the other, it steps over it. - UNIVERSAL_TRANSITION UniversalTransition - UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + UNIVERSAL_TRANSITION UniversalTransition, 0 + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall, 0 + UNIVERSAL_TRANSITION UniversalTransitionReturnResult, 1 + UNIVERSAL_TRANSITION UniversalTransitionReturnResult_DebugStepTailCall, 1 + END diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 29ba0a13aef6f3..8cd04b77136b35 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -762,7 +762,7 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) id = ReadyToRunHelper.GVMLookupForSlot; break; case CorInfoHelpFunc.CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: - return _compilation.NodeFactory.ExternSymbol(_compilation.TypeSystemContext.Target.IsWindows && _compilation.TypeSystemContext.Target.Architecture == TargetArchitecture.X64 ? "RhpResolveInterfaceMethodFast" : "RhpResolveInterfaceMethod"); + return _compilation.NodeFactory.ExternSymbol("RhpResolveInterfaceMethodFast"); case CorInfoHelpFunc.CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL: id = ReadyToRunHelper.TypeHandleToRuntimeType; From 36fa4320e5bff84a01522b9ea3ecd4df73a48854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Mon, 17 Feb 2025 13:49:40 +0100 Subject: [PATCH 08/23] Arm64 and fixes --- .../Runtime/amd64/DispatchResolve.asm | 13 ++++--- .../Runtime/arm64/DispatchResolve.asm | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm index 60f31dbcd2a7eb..1dde32405ddc83 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -32,19 +32,20 @@ LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT ret RhpResolveInterfaceMethodFast_Polymorphic: + ;; load the count of cache entries into edx + ;; r11 points to the first cache entry so to get to m_cEntries, we need to subtract m_rgEntries first push rdx - mov rdx, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - mov r11d, dword ptr [rdx + OFFSETOF__InterfaceDispatchCache__m_cEntries] + mov edx, dword ptr [r11 - OFFSETOF__InterfaceDispatchCache__m_rgEntries + OFFSETOF__InterfaceDispatchCache__m_cEntries] RhpResolveInterfaceMethodFast_NextEntry: - add rdx, SIZEOF__InterfaceDispatchCacheEntry - dec r11d + add r11, SIZEOF__InterfaceDispatchCacheEntry + dec edx jz RhpResolveInterfaceMethodFast_SlowPath_Pop - cmp qword ptr [rdx], rax + cmp qword ptr [r11], rax jne RhpResolveInterfaceMethodFast_NextEntry - mov rax, qword ptr [rdx + 8] + mov rax, qword ptr [r11 + 8] pop rdx ret diff --git a/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm index 61ea19599927b0..92f2f8849e7b11 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm @@ -12,6 +12,43 @@ NESTED_ENTRY RhpResolveInterfaceMethodFast, _TEXT + ;; Load the MethodTable from the object instance in x0. + ;; Trigger an AV if we're dispatching on a null this. + ;; The exception handling infrastructure is aware of the fact that this is the first + ;; instruction of RhpResolveInterfaceMethodFast and uses it to translate an AV here + ;; to a NullReferenceException at the callsite. + ldr x12, [x0] + + ;; x11 currently contains the indirection cell address. + ;; load x13 to point to the cache block. + ldr x13, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + and x14, x13, #IDC_CACHE_POINTER_MASK + cbnz x14, RhpResolveInterfaceMethodFast_SlowPath + + add x14, x13, #OFFSETOF__InterfaceDispatchCache__m_rgEntries + ldr x15, [x14] + cmp x15, x12 + bne RhpResolveInterfaceMethodFast_Polymorphic + ldur x0, [x14, #8] + ret + +RhpResolveInterfaceMethodFast_Polymorphic + ldr w13, [x13, #OFFSETOF__InterfaceDispatchCache__m_cEntries] + +RhpResolveInterfaceMethodFast_NextEntry + add x14, x14, #SIZEOF__InterfaceDispatchCacheEntry + sub w13, w13, #1 + cmp w13, #0 + beq RhpResolveInterfaceMethodFast_SlowPath + + ldr x15, [x14] + cmp x15, x12 + bne RhpResolveInterfaceMethodFast_NextEntry + + ldur x0, [x14, #8] + ret + +RhpResolveInterfaceMethodFast_SlowPath ldr xip0, =RhpCidResolve mov xip1, x11 b RhpUniversalTransitionReturnResult_DebugStepTailCall From 312d75d9159237b0e159b15b617e7efde09fcce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Mon, 17 Feb 2025 23:51:10 +0100 Subject: [PATCH 09/23] Testing --- src/coreclr/jit/targetarm64.h | 2 + .../ControlFlowGuard/ControlFlowGuard.cs | 117 ++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index f5b96a2bc6104a..17c9249ab1b005 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -263,6 +263,8 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (REG_R0 | REG_R12 | REG_R13 | REG_R14 | REG_R15) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 diff --git a/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs b/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs index 5e9189ac90a572..28c4a72e73a23f 100644 --- a/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs +++ b/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs @@ -35,6 +35,9 @@ static int Main(string[] args) // Are we running the control program? if (args.Length == 0) { + TestExceptionFromDispatch.Run(); + TestInterfaceDispatch.Run(); + // Dry run - execute all scenarios while s_armed is false. // // The replaced call target will not be considered invalid by CFG and none of this @@ -84,6 +87,120 @@ static int Main(string[] args) return 10; } + class TestExceptionFromDispatch + { + class CastableObject : IDynamicInterfaceCastable + { + public RuntimeTypeHandle GetInterfaceImplementation(RuntimeTypeHandle interfaceType) => throw new Exception(); + public bool IsInterfaceImplemented(RuntimeTypeHandle interfaceType, bool throwIfNotImplemented) => true; + } + + public static void Run() + { + bool caughtException = false; + + IDisposable obj = (IDisposable)new CastableObject(); + try + { + obj.Dispose(); + } + catch (Exception) + { + caughtException = true; + } + + if (!caughtException) + throw new Exception(); + } + } + + internal class TestInterfaceDispatch + { + interface IFoo + { + int Call(int x, int y); + } + + interface IFoo + { + int Call(int x, int y); + } + + class C1 : IFoo, IFoo + { + public int Call(int x, int y) => x + y; + } + + class C2 : IFoo, IFoo + { + public int Call(int x, int y) => x - y; + } + class C3 : IFoo, IFoo + { + public int Call(int x, int y) => x * y; + } + + class C4 : IFoo, IFoo + { + public int Call(int x, int y) => x / y; + } + + class C5 : IFoo, IFoo + { + public int Call(int x, int y) => x % y; + } + + public static void Run() + { + if (Call(new C1(), 10, 20) != (10 + 20)) + throw new Exception(); + if (Call(new C1(), 11, 22) != (11 + 22)) + throw new Exception(); + if (Call(new C2(), 10, 20) != (10 - 20)) + throw new Exception(); + if (Call(new C2(), 11, 22) != (11 - 22)) + throw new Exception(); + if (Call(new C3(), 10, 20) != (10 * 20)) + throw new Exception(); + if (Call(new C3(), 11, 22) != (11 * 22)) + throw new Exception(); + if (Call(new C4(), 10, 20) != (10 / 20)) + throw new Exception(); + if (Call(new C5(), 10, 20) != (10 % 20)) + throw new Exception(); + + if (CallGen(new C1(), 10, 20) != (10 + 20)) + throw new Exception(); + if (CallGen(new C2(), 11, 22) != (11 - 22)) + throw new Exception(); + if (CallGen(new C3(), 11, 22) != (11 * 22)) + throw new Exception(); + if (CallGen(new C4(), 10, 20) != (10 / 20)) + throw new Exception(); + if (CallGen(new C5(), 10, 20) != (10 % 20)) + throw new Exception(); + + bool caught = false; + try + { + Call(null, 10, 20); + } + catch (NullReferenceException) + { + caught = true; + } + + if (!caught) + throw new Exception(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int Call(IFoo f, int x, int y) => f.Call(x, y); + + [MethodImpl(MethodImplOptions.NoInlining)] + static int CallGen(IFoo f, int x, int y) => f.Call(x, y); + } + class TestFunctionPointer { public static int Run() From b370b1731fca6de240bcfc2af1815b17f495acf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Tue, 18 Feb 2025 08:14:56 +0100 Subject: [PATCH 10/23] Update jiteeversionguid.h --- src/coreclr/inc/jiteeversionguid.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 9383fceebc29b2..0aff3732e519af 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,12 +43,12 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* a116647a-3f80-4fd6-9c80-95156c7e9923 */ - 0xa116647a, - 0x3f80, - 0x4fd6, - {0x9c, 0x80, 0x95, 0x15, 0x6c, 0x7e, 0x99, 0x23} -}; +constexpr GUID JITEEVersionIdentifier = { /* 87673fd2-b9d5-44cb-beee-7ed05db36c52 */ + 0x87673fd2, + 0xb9d5, + 0x44cb, + {0xbe, 0xee, 0x7e, 0xd0, 0x5d, 0xb3, 0x6c, 0x52} + }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// // From 88eeeee33b6e5820060bf7036a185b1f11e00d2b Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Wed, 19 Feb 2025 20:00:39 -0800 Subject: [PATCH 11/23] Add JIT_InterfaceLookupForSlot helper for Win x64 --- src/coreclr/inc/jithelpers.h | 4 ++++ src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 18 ++++++++++++++++++ src/coreclr/vm/contractimpl.h | 1 - src/coreclr/vm/jithelpers.cpp | 4 +++- src/coreclr/vm/virtualcallstub.cpp | 6 ++++++ 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index e5ce70be0221f3..c6268c0f6821ed 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -309,7 +309,11 @@ JITHELPER(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, JIT_ReversePInvokeExitTrackTransitions, METHOD__NIL) JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, METHOD__NIL) +#if defined(TARGET_AMD64) + JITHELPER(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, JIT_InterfaceLookupForSlot, METHOD__NIL) +#else JITHELPER(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, NULL, METHOD__NIL) +#endif #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, METHOD__NIL) diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index b533789980c510..bff9a601cece75 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -83,4 +83,22 @@ Fail: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +;; On Input: +;; r11 contains the address of the indirection cell (with the flags in the low bits) +NESTED_ENTRY JIT_InterfaceLookupForSlot, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + mov r8, 7FFFFFFFFFFFFFFFh ; INVALID_TOKEN + xor r9, r9 ; flags + + call VSD_ResolveWorker + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + TAILJMP_RAX + +NESTED_END JIT_InterfaceLookupForSlot, _TEXT + end diff --git a/src/coreclr/vm/contractimpl.h b/src/coreclr/vm/contractimpl.h index f8d7d81856f467..1d005d8e6ddddc 100644 --- a/src/coreclr/vm/contractimpl.h +++ b/src/coreclr/vm/contractimpl.h @@ -258,7 +258,6 @@ struct DispatchToken explicit DispatchToken(UINT_PTR token) { - CONSISTENCY_CHECK(token != INVALID_TOKEN); m_token = token; } diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index fe5564e299060d..2edd90acb8c7ae 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -3495,10 +3495,12 @@ HCIMPL1_RAW(void, JIT_ReversePInvokeExit, ReversePInvokeFrame* frame) } HCIMPLEND_RAW -// These two do take args but have a custom calling convention. +// These do take args but have a custom calling convention. EXTERN_C void JIT_ValidateIndirectCall(); EXTERN_C void JIT_DispatchIndirectCall(); +EXTERN_C void JIT_InterfaceLookupForSlot(); + //======================================================================== // // JIT HELPERS INITIALIZATION diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index c22fbaedd7e53d..658bcc3f7479fa 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1342,6 +1342,12 @@ PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); DispatchToken representativeToken(token); + if (!representativeToken.IsValid()) + { + // Used by JIT_InterfaceLookupForSlot + representativeToken = DispatchToken(VirtualCallStubManager::GetTokenFromStub(callSite.GetSiteTarget())); + } + MethodTable * pRepresentativeMT = pObj->GetMethodTable(); if (representativeToken.IsTypedToken()) { From 1d710c4be07edbcb70cb5068e3decaa9135867f5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 1 Mar 2025 14:20:16 +0100 Subject: [PATCH 12/23] Clear out call cookie when changing VSD call to indirect --- src/coreclr/jit/lower.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2ff36697198abc..3fde1c7ef5b057 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3648,11 +3648,13 @@ void Lowering::LowerCFGCall(GenTreeCall* call) call->gtCallType = CT_INDIRECT; call->gtFlags &= ~GTF_CALL_VIRT_STUB; call->gtCallAddr = resolve; + call->gtCallCookie = nullptr; #ifdef FEATURE_READYTORUN call->gtEntryPoint.addr = nullptr; call->gtEntryPoint.accessType = IAT_VALUE; #endif + if (callTarget != nullptr) { callTarget->SetUnusedValue(); From c3d49bbe8681a1bb14b6302c93827d39870a95ce Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 1 Mar 2025 14:21:07 +0100 Subject: [PATCH 13/23] Nit --- src/coreclr/jit/lower.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 3fde1c7ef5b057..46794b335550f2 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3654,7 +3654,6 @@ void Lowering::LowerCFGCall(GenTreeCall* call) call->gtEntryPoint.accessType = IAT_VALUE; #endif - if (callTarget != nullptr) { callTarget->SetUnusedValue(); From 66f62704df22e7f6ebae5b33861f537c65fde015 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 1 Mar 2025 14:34:59 +0100 Subject: [PATCH 14/23] Probable fix for GC issues --- src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index bff9a601cece75..1a16280f1d7684 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -33,6 +33,8 @@ NESTED_ENTRY ResolveWorkerAsmStub, _TEXT call VSD_ResolveWorker + mov rcx, [rsp + __PWTB_TransitionBlock] + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL TAILJMP_RAX @@ -84,7 +86,10 @@ Fail: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT ;; On Input: +;; rcx contains object 'this' pointer ;; r11 contains the address of the indirection cell (with the flags in the low bits) +;; +;; Preserves all argument registers NESTED_ENTRY JIT_InterfaceLookupForSlot, _TEXT PROLOG_WITH_TRANSITION_BLOCK @@ -96,6 +101,8 @@ NESTED_ENTRY JIT_InterfaceLookupForSlot, _TEXT call VSD_ResolveWorker + RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters + RESTORE_ARGUMENT_REGISTERS __PWTB_ArgumentRegisters EPILOG_WITH_TRANSITION_BLOCK_RETURN TAILJMP_RAX From e6b18199b40f6f5f22a1f280d5dde7d10d8d45fd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 1 Mar 2025 14:35:42 +0100 Subject: [PATCH 15/23] Undo accidental change --- src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index 1a16280f1d7684..85cdfdf6a24b3f 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -33,8 +33,6 @@ NESTED_ENTRY ResolveWorkerAsmStub, _TEXT call VSD_ResolveWorker - mov rcx, [rsp + __PWTB_TransitionBlock] - EPILOG_WITH_TRANSITION_BLOCK_TAILCALL TAILJMP_RAX From bb89b87023d149c9872a71d536ca3c7a38053ed8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 1 Mar 2025 14:39:22 +0100 Subject: [PATCH 16/23] Update JIT-EE GUID after merge --- src/coreclr/inc/jiteeversionguid.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index ca025f62a7ec4d..5feda781faee94 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 4463d6ac-dfcb-4ab0-a941-c53b56089b7c */ - 0x4463d6ac, - 0xdfcb, - 0x4ab0, - {0xa9, 0x41, 0xc5, 0x3b, 0x56, 0x08, 0x9b, 0x7c} +constexpr GUID JITEEVersionIdentifier = { /* 254e838d-821b-4600-9c4e-b5ea6ef20d38 */ + 0x254e838d, + 0x821b, + 0x4600, + {0x9c, 0x4e, 0xb5, 0xea, 0x6e, 0xf2, 0x0d, 0x38} }; #endif // JIT_EE_VERSIONING_GUID_H From 5ca6411ebf3f579d8403b9149c194d38fd70a4f8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 1 Mar 2025 14:40:49 +0100 Subject: [PATCH 17/23] Run jit-format --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 6eb277a37d219a..0203aee46c56ad 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3441,7 +3441,7 @@ void Lowering::LowerCFGCall(GenTreeCall* call) // Finally update the call target call->gtCallType = CT_INDIRECT; call->gtFlags &= ~GTF_CALL_VIRT_STUB; - call->gtCallAddr = resolve; + call->gtCallAddr = resolve; call->gtCallCookie = nullptr; #ifdef FEATURE_READYTORUN call->gtEntryPoint.addr = nullptr; From 5d66c771758b9e003fd892d1814802dd393d0662 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 3 Mar 2025 10:52:13 +0100 Subject: [PATCH 18/23] Fix Jan's bug, fix tailcall test --- src/coreclr/jit/codegencommon.cpp | 1 + src/coreclr/jit/emit.cpp | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index b41502400ab988..291e9639ee9547 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6074,6 +6074,7 @@ void CodeGen::genDefinePendingCallLabel(GenTreeCall* call) { case CORINFO_HELP_VALIDATE_INDIRECT_CALL: case CORINFO_HELP_VIRTUAL_FUNC_PTR: + case CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: case CORINFO_HELP_MEMSET: case CORINFO_HELP_MEMCPY: return; diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index bbf30da0c728fa..114b81cd72fdc1 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10354,12 +10354,12 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) { // Is it a helper with a special saved set? bool isNoGCHelper = emitNoGChelper(methHnd); + CorInfoHelpFunc helper = Compiler::eeGetHelperNum(methHnd); + if (isNoGCHelper) { - CorInfoHelpFunc helpFunc = Compiler::eeGetHelperNum(methHnd); - // Get the set of registers that this call kills and remove it from the saved set. - regMaskTP savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helpFunc); + regMaskTP savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helper); #ifdef DEBUG if (emitComp->verbose) @@ -10372,6 +10372,11 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) #endif return savedSet; } + else if (helper == CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT) + { + // This one is not no-gc, but it preserves arg registers. + return RBM_ALLINT & ~RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; + } else { // This is the saved set of registers after a normal call. From f28053da721e61714d4ecf3b7fabcd066318b633 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 3 Mar 2025 10:55:06 +0100 Subject: [PATCH 19/23] Fix build on targets without custom calling convention for helper --- src/coreclr/jit/emit.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 114b81cd72fdc1..567f07f66f1e0f 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10353,8 +10353,8 @@ const char* emitter::emitOffsetToLabel(unsigned offs) regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) { // Is it a helper with a special saved set? - bool isNoGCHelper = emitNoGChelper(methHnd); - CorInfoHelpFunc helper = Compiler::eeGetHelperNum(methHnd); + bool isNoGCHelper = emitNoGChelper(methHnd); + CorInfoHelpFunc helper = Compiler::eeGetHelperNum(methHnd); if (isNoGCHelper) { @@ -10372,11 +10372,13 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) #endif return savedSet; } +#ifdef RBM_INTERFACELOOKUP_FOR_SLOT_TRASH else if (helper == CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT) { // This one is not no-gc, but it preserves arg registers. return RBM_ALLINT & ~RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; } +#endif else { // This is the saved set of registers after a normal call. From d866b1ebb51b440d2499daaa93685e7787d99b00 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Mon, 3 Mar 2025 08:53:29 -0800 Subject: [PATCH 20/23] Delete unnecessary TAILJMP_RAX --- src/coreclr/vm/amd64/AsmHelpers.asm | 3 +-- src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 55251c3ec70f99..9c87afc766e2e9 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -459,7 +459,6 @@ NESTED_ENTRY JIT_Patchpoint, _TEXT call JIT_PatchpointWorkerWorkerWithPolicy EPILOG_WITH_TRANSITION_BLOCK_RETURN - TAILJMP_RAX NESTED_END JIT_Patchpoint, _TEXT ; first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL @@ -497,4 +496,4 @@ NESTED_ENTRY InterpreterStub, _TEXT NESTED_END InterpreterStub, _TEXT endif ; FEATURE_INTERPRETER - end \ No newline at end of file + end diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index 85cdfdf6a24b3f..ff35de2cbee538 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -102,7 +102,6 @@ NESTED_ENTRY JIT_InterfaceLookupForSlot, _TEXT RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters RESTORE_ARGUMENT_REGISTERS __PWTB_ArgumentRegisters EPILOG_WITH_TRANSITION_BLOCK_RETURN - TAILJMP_RAX NESTED_END JIT_InterfaceLookupForSlot, _TEXT From a70c6507d65d89dd7e5f8fdf37032eb9381a52e5 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Mon, 3 Mar 2025 12:31:05 -0800 Subject: [PATCH 21/23] Depend on the GCInfo produced by the JIT for reporting of arg registers --- src/coreclr/vm/amd64/VirtualCallStubAMD64.asm | 5 +- src/coreclr/vm/amd64/cgenamd64.cpp | 26 +++++ src/coreclr/vm/virtualcallstub.cpp | 99 +++++++++++++++++-- src/coreclr/vm/virtualcallstub.h | 1 + 4 files changed, 122 insertions(+), 9 deletions(-) diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index ff35de2cbee538..8560570fcc1fcc 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -7,6 +7,7 @@ include AsmConstants.inc CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA extern VSD_ResolveWorker:proc + extern VSD_ResolveWorkerForInterfaceLookupSlot:proc extern CHAIN_SUCCESS_COUNTER:dword BACKPATCH_FLAG equ 1 ;; Also known as SDF_ResolveBackPatch in the EE @@ -94,10 +95,8 @@ NESTED_ENTRY JIT_InterfaceLookupForSlot, _TEXT lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock mov rdx, r11 ; indirection cell - mov r8, 7FFFFFFFFFFFFFFFh ; INVALID_TOKEN - xor r9, r9 ; flags - call VSD_ResolveWorker + call VSD_ResolveWorkerForInterfaceLookupSlot RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters RESTORE_ARGUMENT_REGISTERS __PWTB_ArgumentRegisters diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index ec952dd0f6f873..cb754c192d3a29 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -40,6 +40,27 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis #undef CALLEE_SAVED_REGISTER } +#ifdef TARGET_WINDOWS +void UpdateRegDisplayFromArgumentRegisters(REGDISPLAY * pRD, ArgumentRegisters* pRegs) +{ + LIMITED_METHOD_CONTRACT; + + // TODO: Fix ENUM_ARGUMENT_REGISTERS to have consistent casing for rcx and rdx + + T_CONTEXT * pContext = pRD->pCurrentContext; + pContext->Rcx = pRegs->RCX; + pContext->Rdx = pRegs->RDX; + pContext->R8 = pRegs->R8; + pContext->R9 = pRegs->R9; + + KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers; + pContextPointers->Rcx = (PULONG64)&pRegs->RCX; + pContextPointers->Rdx = (PULONG64)&pRegs->RDX; + pContextPointers->R8 = (PULONG64)&pRegs->R8; + pContextPointers->R9 = (PULONG64)&pRegs->R9; +} +#endif + void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD) { LIMITED_METHOD_CONTRACT; @@ -79,6 +100,11 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters()); ClearRegDisplayArgumentAndScratchRegisters(pRD); +#ifdef TARGET_WINDOWS + // TODO: Quick hack + UpdateRegDisplayFromArgumentRegisters(pRD, GetArgumentRegisters()); +#endif + SyncRegDisplayToCurrentContext(pRD); LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay_Impl(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index f54f52ad496bdc..88da6adbee3688 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1346,12 +1346,6 @@ PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); DispatchToken representativeToken(token); - if (!representativeToken.IsValid()) - { - // Used by JIT_InterfaceLookupForSlot - representativeToken = DispatchToken(VirtualCallStubManager::GetTokenFromStub(callSite.GetSiteTarget())); - } - MethodTable * pRepresentativeMT = pObj->GetMethodTable(); if (representativeToken.IsTypedToken()) { @@ -1406,6 +1400,99 @@ PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, return target; } +PCODE VSD_ResolveWorkerForInterfaceLookupSlot(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(CheckPointer(pTransitionBlock)); + MODE_COOPERATIVE; + } CONTRACTL_END; + + MAKE_CURRENT_THREAD_AVAILABLE(); + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + StubDispatchFrame frame(pTransitionBlock); + StubDispatchFrame * pSDFrame = &frame; + + PCODE returnAddress = pSDFrame->GetUnadjustedReturnAddress(); + + StubCallSite callSite(siteAddrForRegisterIndirect, returnAddress); + + OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); + _ASSERTE(protectedObj != NULL); + OBJECTREF pObj = *protectedObj; + + PCODE target = (PCODE)NULL; + + bool propagateExceptionToNativeCode = IsCallDescrWorkerInternalReturnAddress(pTransitionBlock->m_ReturnAddress); + + if (pObj == NULL) { + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER_EX; + INSTALL_UNWIND_AND_CONTINUE_HANDLER_EX; + COMPlusThrow(kNullReferenceException); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER_EX(propagateExceptionToNativeCode); + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER_EX(propagateExceptionToNativeCode); + _ASSERTE(!"Throw returned"); + } + + pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); + + DispatchToken representativeToken = DispatchToken(VirtualCallStubManager::GetTokenFromStub(callSite.GetSiteTarget())); + + MethodTable * pRepresentativeMT = pObj->GetMethodTable(); + if (representativeToken.IsTypedToken()) + { + pRepresentativeMT = AppDomain::GetCurrentDomain()->LookupType(representativeToken.GetTypeID()); + CONSISTENCY_CHECK(CheckPointer(pRepresentativeMT)); + } + + pSDFrame->Push(CURRENT_THREAD); + + INSTALL_MANAGED_EXCEPTION_DISPATCHER_EX; + INSTALL_UNWIND_AND_CONTINUE_HANDLER_EX; + + GCPROTECT_BEGIN(*protectedObj); + + // For Virtual Delegates the m_siteAddr is a field of a managed object + // Thus we have to report it as an interior pointer, + // so that it is updated during a gc + GCPROTECT_BEGININTERIOR( *(callSite.GetIndirectCellAddress()) ); + + GCStress::MaybeTriggerAndProtect(pObj); + + PCODE callSiteTarget = callSite.GetSiteTarget(); + CONSISTENCY_CHECK(callSiteTarget != (PCODE)NULL); + + StubCodeBlockKind stubKind = STUB_CODE_BLOCK_UNKNOWN; + VirtualCallStubManager *pMgr = VirtualCallStubManager::FindStubManager(callSiteTarget, &stubKind); + PREFIX_ASSUME(pMgr != NULL); + + target = pMgr->ResolveWorker(&callSite, protectedObj, representativeToken, stubKind); + +#if _DEBUG + if (pSDFrame->GetGCRefMap() != NULL) + { + GCX_PREEMP(); + _ASSERTE(CheckGCRefMapEqual(pSDFrame->GetGCRefMap(), pSDFrame->GetFunction(), true)); + } +#endif // _DEBUG + + GCPROTECT_END(); + GCPROTECT_END(); + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER_EX(propagateExceptionToNativeCode); + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER_EX(propagateExceptionToNativeCode); + pSDFrame->Pop(CURRENT_THREAD); + + return target; +} + void VirtualCallStubManager::BackPatchWorkerStatic(PCODE returnAddress, TADDR siteAddrForRegisterIndirect) { CONTRACTL { diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 7638f2aec1eb29..054f146673ea2b 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -46,6 +46,7 @@ extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock, #endif ); +extern "C" PCODE STDCALL VSD_ResolveWorkerForInterfaceLookupSlot(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect); ///////////////////////////////////////////////////////////////////////////////////// #if defined(TARGET_X86) || defined(TARGET_AMD64) From bb2d70f4a3916ebc3ad870a115db31e4b59050f6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 3 Mar 2025 12:54:14 +0100 Subject: [PATCH 22/23] JIT: Simplify internal GC tracking structures Track all integer registers for calls in `regPtrDsc`. This does not cost any extra memory and it saves us from going back and forth between an intermediate format. It also unblocks proper GC reporting for helper calls that are GC reported with non-standard calling convention. --- src/coreclr/jit/emit.cpp | 26 +++++++++----------------- src/coreclr/jit/gcencode.cpp | 30 +++++++++++++++++++++--------- src/coreclr/jit/jitgcinfo.h | 15 +++++++++------ src/coreclr/jit/regset.cpp | 21 --------------------- src/coreclr/jit/target.h | 15 +++++---------- src/coreclr/jit/targetamd64.h | 8 -------- src/coreclr/jit/targetarm.h | 4 ---- src/coreclr/jit/targetarm64.h | 4 ---- src/coreclr/jit/targetx86.h | 7 ------- 9 files changed, 44 insertions(+), 86 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 567f07f66f1e0f..ede18f7f84c810 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -9983,7 +9983,6 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn unsigned argStkCnt; S_UINT16 argRecCnt(0); // arg count for ESP, ptr-arg count for EBP - unsigned gcrefRegs, byrefRegs; #ifdef JIT32_GCENCODER // For the general encoder, we always need to record calls, so we make this call @@ -10025,26 +10024,19 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn return; #endif - // Do we have any interesting (i.e., callee-saved) registers live here? + // Do we have any interesting registers live here? - gcrefRegs = byrefRegs = 0; + unsigned gcrefRegs = emitThisGCrefRegs.GetIntRegSet() >> REG_INT_FIRST; + unsigned byrefRegs = emitThisByrefRegs.GetIntRegSet() >> REG_INT_FIRST; - // We make a bitmask whose bits correspond to callee-saved register indices (in the sequence - // of callee-saved registers only). - for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) - { - regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; - if (emitThisGCrefRegs & calleeSavedRbm) - { - gcrefRegs |= (1 << calleeSavedRegIdx); - } - if (emitThisByrefRegs & calleeSavedRbm) - { - byrefRegs |= (1 << calleeSavedRegIdx); - } - } + assert(regMaskTP::FromIntRegSet(SingleTypeRegSet(gcrefRegs << REG_INT_FIRST)) == emitThisGCrefRegs); + assert(regMaskTP::FromIntRegSet(SingleTypeRegSet(byrefRegs << REG_INT_FIRST)) == emitThisByrefRegs); #ifdef JIT32_GCENCODER + // x86 does not report GC refs/byrefs in return registers at call sites + gcrefRegs &= ~(1u << (REG_INTRET - REG_INT_FIRST)); + byrefRegs &= ~(1u << (REG_INTRET - REG_INT_FIRST)); + // For the general encoder, we always have to record calls, so we don't take this early return. /* Are there any // args to pop at this call site? diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index b32dc60292dbbf..688f61f60923ed 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3199,9 +3199,24 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un callArgCnt = genRegPtrTemp->rpdPtrArg; - unsigned gcrefRegMask = genRegPtrTemp->rpdCallGCrefRegs; + unsigned gcrefRegMask = 0; - byrefRegMask = genRegPtrTemp->rpdCallByrefRegs; + byrefRegMask = 0; + + // The order here is fixed: it must agree with the order assumed in eetwain. + // NB: x86 GC decoder does not report return registers at call sites. + static const regNumber calleeSaveOrder[] = {REG_EDI, REG_ESI, REG_EBX, REG_EBP}; + for (unsigned i = 0; i < ArrLen(calleeSaveOrder); i++) + { + if ((genRegPtrTemp->rpdCallGCrefRegs & (1 << (calleeSaveOrder[i] - REG_INT_FIRST))) != 0) + { + gcrefRegMask |= 1u << i; + } + if ((genRegPtrTemp->rpdCallByrefRegs & (1 << (calleeSaveOrder[i] - REG_INT_FIRST))) != 0) + { + byrefRegMask |= 1u << i; + } + } assert((gcrefRegMask & byrefRegMask) == 0); @@ -4465,8 +4480,8 @@ void GCInfo::gcMakeRegPtrTable( assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0); // Other than that, we just have to deal with the regmasks. - regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); - regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); + regMaskSmall gcrefRegMask = call->cdGCrefRegs; + regMaskSmall byrefRegMask = call->cdByrefRegs; assert((gcrefRegMask & byrefRegMask) == 0); @@ -4552,11 +4567,8 @@ void GCInfo::gcMakeRegPtrTable( { // This is a true call site. - regMaskSmall gcrefRegMask = - genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); - - regMaskSmall byrefRegMask = - genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); + regMaskSmall gcrefRegMask = regMaskSmall(genRegPtrTemp->rpdCallGCrefRegs << REG_INT_FIRST); + regMaskSmall byrefRegMask = regMaskSmall(genRegPtrTemp->rpdCallByrefRegs << REG_INT_FIRST); assert((gcrefRegMask & byrefRegMask) == 0); diff --git a/src/coreclr/jit/jitgcinfo.h b/src/coreclr/jit/jitgcinfo.h index 6c62a0816113a5..6d04e3b22734b3 100644 --- a/src/coreclr/jit/jitgcinfo.h +++ b/src/coreclr/jit/jitgcinfo.h @@ -162,7 +162,13 @@ class GCInfo regMaskSmall rpdDel; // regptr bitset being removed } rpdCompiler; - unsigned short rpdPtrArg; // arg offset or popped arg count + struct + { + // Registers after call containing GC/byref (index 0 = REG_INT_FIRST) + unsigned int rpdCallGCrefRegs; + unsigned int rpdCallByrefRegs; + unsigned short rpdPtrArg; // arg offset or popped arg count + }; }; #ifndef JIT32_GCENCODER @@ -182,11 +188,8 @@ class GCInfo return (GCtype)rpdGCtype; } - unsigned short rpdIsThis : 1; // is it the 'this' pointer - unsigned short rpdCall : 1; // is this a true call site? - unsigned short : 1; // Padding bit, so next two start on a byte boundary - unsigned short rpdCallGCrefRegs : CNT_CALL_GC_REGS; // Callee-saved and return registers containing GC pointers. - unsigned short rpdCallByrefRegs : CNT_CALL_GC_REGS; // Callee-saved and return registers containing byrefs. + unsigned short rpdIsThis : 1; // is it the 'this' pointer + unsigned short rpdCall : 1; // is this a true call site? #ifndef JIT32_GCENCODER bool rpdIsCallInstr() diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 06444fb31cf956..3d9354b040f48e 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -944,27 +944,6 @@ regNumber genRegArgNext(regNumber argReg) } } -/***************************************************************************** - * - * The following table determines the order in which callee registers - * are encoded in GC information at call sites. - */ - -const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; - -regMaskTP genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) -{ - regMaskTP res = 0; - for (int i = 0; i < CNT_CALL_GC_REGS; i++) - { - if ((calleeSaveMask & (1 << i)) != 0) - { - res |= raRbmCalleeSaveOrder[i]; - } - } - return res; -} - /***************************************************************************** * * Initializes the spill code. Should be called once per function compiled. diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index a9728857eb717f..cb4957a6b5f107 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -379,6 +379,11 @@ struct regMaskTP #endif } + static regMaskTP FromIntRegSet(SingleTypeRegSet intRegs) + { + return regMaskTP(intRegs); + } + void operator|=(const regMaskTP& second) { low |= second.getLow(); @@ -1066,16 +1071,6 @@ inline SingleTypeRegSet getSingleTypeRegMask(regNumber reg, var_types regType) return regMask; } -/***************************************************************************** - * - * These arrays list the callee-saved register numbers (and bitmaps, respectively) for - * the current architecture. - */ -extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; - -// This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. -regMaskTP genRegMaskFromCalleeSavedMask(unsigned short); - /***************************************************************************** * * Assumes that "reg" is of the given "type". Return the next unused reg number after "reg" diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 5208561cff9f06..d26ba3057ce3db 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -300,7 +300,6 @@ #ifdef UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (5 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED + 2) #define CNT_CALLEE_TRASH_INT_INIT (9) #define CNT_CALLEE_TRASH_HIGHINT (8) @@ -308,16 +307,12 @@ #define CNT_CALLEE_SAVED_FLOAT (0) #define CNT_CALLEE_TRASH_FLOAT_INIT (16) #define CNT_CALLEE_TRASH_HIGHFLOAT (16) - /* NOTE: Sync with variable name defined in compiler.h */ - #define RBM_CALL_GC_REGS_ORDER RBM_EBX,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15,RBM_INTRET,RBM_INTRET_1 - #define RBM_CALL_GC_REGS (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_INTRET|RBM_INTRET_1) // For SysV we have more volatile registers so we do not save any callee saves for EnC. #define RBM_ENC_CALLEE_SAVED 0 #else // !UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (7 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED + 1) #define CNT_CALLEE_TRASH_INT_INIT (7) #define CNT_CALLEE_TRASH_HIGHINT (8) @@ -326,9 +321,6 @@ #define CNT_CALLEE_SAVED_FLOAT (10) #define CNT_CALLEE_TRASH_FLOAT_INIT (6) #define CNT_CALLEE_TRASH_HIGHFLOAT (16) - /* NOTE: Sync with variable name defined in compiler.h */ - #define RBM_CALL_GC_REGS_ORDER RBM_EBX,RBM_ESI,RBM_EDI,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15,RBM_INTRET - #define RBM_CALL_GC_REGS (RBM_EBX|RBM_ESI|RBM_EDI|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_INTRET) // Callee-preserved registers we always save and allow use of for EnC code, since there are quite few volatile registers. #define RBM_ENC_CALLEE_SAVED (RBM_RSI | RBM_RDI) diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 710187e70b0667..95cb19a2291a49 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -89,13 +89,9 @@ #define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7) #define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC) - #define RBM_CALL_GC_REGS_ORDER RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11,RBM_INTRET - #define RBM_CALL_GC_REGS (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_INTRET) - #define CNT_CALLEE_SAVED (8) #define CNT_CALLEE_TRASH (6) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED+1) #define CNT_CALLEE_SAVED_FLOAT (16) #define CNT_CALLEE_TRASH_FLOAT (16) diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 17c9249ab1b005..a311b2276cf882 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -108,13 +108,9 @@ REG_V12, REG_V13, REG_V14, REG_V15, \ REG_V3, REG_V2, REG_V1, REG_V0 - #define RBM_CALL_GC_REGS_ORDER RBM_R19,RBM_R20,RBM_R21,RBM_R22,RBM_R23,RBM_R24,RBM_R25,RBM_R26,RBM_R27,RBM_R28,RBM_INTRET,RBM_INTRET_1 - #define RBM_CALL_GC_REGS (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28|RBM_INTRET|RBM_INTRET_1) - #define CNT_CALLEE_SAVED (11) #define CNT_CALLEE_TRASH (17) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED+2) #define CNT_CALLEE_SAVED_FLOAT (8) #define CNT_CALLEE_TRASH_FLOAT (24) diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 2e46478690e5cc..dd63766d631ac7 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -143,16 +143,9 @@ #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX #define MAX_VAR_ORDER_SIZE 6 - // The order here is fixed: it must agree with an order assumed in eetwain... - // NB: x86 GC decoder does not report return registers at call sites. - #define RBM_CALL_GC_REGS_ORDER RBM_EDI,RBM_ESI,RBM_EBX,RBM_EBP - #define RBM_CALL_GC_REGS (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP) - #define CNT_CALLEE_SAVED (4) #define CNT_CALLEE_TRASH (3) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - // NB: x86 GC decoder does not report return registers at call sites. - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED) #define CNT_CALLEE_SAVED_FLOAT (0) #define CNT_CALLEE_TRASH_FLOAT (6) From 42343da2b60dee7fe1cb10fc0fcd9ee4ec9fcfb1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 3 Mar 2025 22:32:27 +0100 Subject: [PATCH 23/23] Remove check --- src/coreclr/gcinfo/gcinfoencoder.cpp | 102 ++------------------------- src/coreclr/inc/gcinfoencoder.h | 4 -- 2 files changed, 4 insertions(+), 102 deletions(-) diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index d1988ba34a5394..b512e92a6e3f28 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -909,97 +909,6 @@ void GcInfoEncoder::FinalizeSlotIds() #endif } -#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - -// tells whether a slot cannot contain an object reference -// at call instruction or right after returning -bool GcInfoEncoder::DoNotTrackInPartiallyInterruptible(GcSlotDesc &slotDesc) -{ -#if defined(TARGET_ARM) - - _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - if(slotDesc.IsRegister()) - { - int regNum = (int) slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 14); - _ASSERTE(regNum != 13); // sp - - return ((regNum <= 3) || (regNum >= 12)) // R12 is volatile and SP/LR can't contain objects around calls - && regNum != 0 // R0 can contain return value - ; - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#elif defined(TARGET_ARM64) - - _ASSERTE(m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1); - if (slotDesc.IsRegister()) - { - int regNum = (int)slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 30); - _ASSERTE(regNum != 18); - - return (regNum <= 17 || regNum >= 29) // X0 through X17 are scratch, FP/LR can't be used for objects around calls - && regNum != 0 // X0 can contain return value - && regNum != 1 // X1 can contain return value - ; - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#elif defined(TARGET_AMD64) - - _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - if(slotDesc.IsRegister()) - { - int regNum = (int) slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 16); - _ASSERTE(regNum != 4); // rsp - - UINT16 PreservedRegMask = - (1 << 3) // rbx - | (1 << 5) // rbp -#ifndef UNIX_AMD64_ABI - | (1 << 6) // rsi - | (1 << 7) // rdi -#endif // UNIX_AMD64_ABI - | (1 << 12) // r12 - | (1 << 13) // r13 - | (1 << 14) // r14 - | (1 << 15) // r15 - | (1 << 0) // rax - may contain return value -#ifdef UNIX_AMD64_ABI - | (1 << 2) // rdx - may contain return value -#endif - ; - - return !(PreservedRegMask & (1 << regNum)); - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#else - return FALSE; -#endif -} -#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - void GcInfoEncoder::Build() { #ifdef _DEBUG @@ -1389,14 +1298,11 @@ void GcInfoEncoder::Build() else { UINT32 slotIndex = pCurrent->SlotId; - if(!DoNotTrackInPartiallyInterruptible(m_SlotTable[slotIndex])) - { - BYTE becomesLive = pCurrent->BecomesLive; - _ASSERTE((liveState.ReadBit(slotIndex) && !becomesLive) - || (!liveState.ReadBit(slotIndex) && becomesLive)); + BYTE becomesLive = pCurrent->BecomesLive; + _ASSERTE((liveState.ReadBit(slotIndex) && !becomesLive) + || (!liveState.ReadBit(slotIndex) && becomesLive)); - liveState.WriteBit(slotIndex, becomesLive); - } + liveState.WriteBit(slotIndex, becomesLive); pCurrent++; } } diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h index 8c5daf92c23b3f..3777e1b7064bb2 100644 --- a/src/coreclr/inc/gcinfoencoder.h +++ b/src/coreclr/inc/gcinfoencoder.h @@ -542,10 +542,6 @@ class GcInfoEncoder void SizeofSlotStateVarLengthVector(const BitArray& vector, UINT32 baseSkip, UINT32 baseRun, UINT32 * pSizeofSimple, UINT32 * pSizeofRLE, UINT32 * pSizeofRLENeg); UINT32 WriteSlotStateVarLengthVector(BitStreamWriter &writer, const BitArray& vector, UINT32 baseSkip, UINT32 baseRun); -#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - bool DoNotTrackInPartiallyInterruptible(GcSlotDesc &slot); -#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - // Assumes that "*ppTransitions" is has size "numTransitions", is sorted by CodeOffset then by SlotId, // and that "*ppEndTransitions" points one beyond the end of the array. If "*ppTransitions" contains // any dead/live transitions pairs for the same CodeOffset and SlotID, removes those, by allocating a