From 94cb2c5a25a4750c0c8029db6c1996a23ef53caf Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Thu, 16 Jan 2025 20:46:39 -0800 Subject: [PATCH] clamp Vector size to largest accelerated fixed-sized vector --- src/coreclr/inc/clrconfigvalues.h | 3 ++ src/coreclr/vm/codeman.cpp | 50 +++++++++++++++---- src/tests/Common/testenvironment.proj | 2 +- .../X86/General/IsSupported.cs | 5 ++ .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 4 +- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 7e23d58858dd93..f010644c5caed7 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -686,6 +686,9 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" #endif RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum decimal width, in bits, that Vector is allowed to be. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10) +#if defined(TARGET_AMD64) || defined(TARGET_X86) +RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0, "The maximum decimal width, in bits, of fixed-width vectors that may be considered hardware accelerated. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10) +#endif // defined(TARGET_AMD64) || defined(TARGET_X86) // // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 50648840fedf94..d82fa0f4f7f102 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1548,17 +1548,6 @@ void EEJitManager::SetCpuInfo() #if defined(TARGET_X86) || defined(TARGET_AMD64) - // Clean up mutually exclusive ISAs - if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) - { - CPUCompileFlags.Clear(InstructionSet_VectorT256); - CPUCompileFlags.Clear(InstructionSet_VectorT128); - } - else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) - { - CPUCompileFlags.Clear(InstructionSet_VectorT128); - } - int cpuidInfo[4]; const int CPUID_EAX = 0; @@ -1625,6 +1614,45 @@ void EEJitManager::SetCpuInfo() } } } + + // JIT maps Vector to Vector128, Vector256, or Vector512 for the purposes of most intrinsic resolution. + // If JIT reports that the corresponding fixed-width vector class is not hardware accelerated, that will + // mean Vector is also reported as not accelerated, so we will limit Vector size using the same rules. + // This logic must be kept in sync with Compiler::compSetProcessor/Compiler::getPreferredVectorByteLength. + + uint32_t preferredVectorBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PreferredVectorBitWidth) / 128) * 128; + + if ((preferredVectorBitWidth == 0) && CPUCompileFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING)) + { + preferredVectorBitWidth = 256; + } + + if (preferredVectorBitWidth != 0) + { + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512) && (preferredVectorBitWidth < 512)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT512); + } + + if (CPUCompileFlags.IsSet(InstructionSet_VectorT256) && (preferredVectorBitWidth < 256)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT256); + } + } + + // Only one VectorT ISA can be set, and we have validated that anything left in the flags is supported + // by both the hardware and the config. Remove everything less than the largest supported. + + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT256); + CPUCompileFlags.Clear(InstructionSet_VectorT128); + } + else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) + { + CPUCompileFlags.Clear(InstructionSet_VectorT128); + } + #endif // TARGET_X86 || TARGET_AMD64 m_CPUCompileFlags = CPUCompileFlags; diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 0ef77761674105..46d0b2cb1fa5fe 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -136,7 +136,7 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs index 18241d9ecfc40d..c1769def972831 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs @@ -19,6 +19,11 @@ public static void IsSupported() if (Sse.IsSupported && int.TryParse(Environment.GetEnvironmentVariable("DOTNET_EnableIncompleteISAClass"), out var enableIncompleteIsa) && (enableIncompleteIsa != 0)) { // X86 platforms + if (Vector.Count == 64 && !Avx512F.IsSupported) + { + result = false; + } + if (Vector.Count == 32 && !Avx2.IsSupported) { result = false; diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index e88f13dc80c7fc..aeeb847495fd5a 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -386,11 +386,11 @@ public unsafe static void CpuId() if ((maxVectorTBitWidth >= 512) && !isAvx512HierarchyDisabled) { - vectorTByteLength = 64; + vectorTByteLength = int.Min(64, preferredVectorByteLength); } else if ((maxVectorTBitWidth is 0 or >= 256) && !isAvx2HierarchyDisabled) { - vectorTByteLength = 32; + vectorTByteLength = int.Min(32, preferredVectorByteLength); } if (Vector.Count != vectorTByteLength)