dotnet · saucecontrol · Jan 15, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml
@@ -386,6 +386,9 @@ jobs:
           - jitstress_isas_x86_nosse41
           - jitstress_isas_x86_nosse42
           - jitstress_isas_x86_nossse3
+          - jitstress_isas_x86_vectort128
+          - jitstress_isas_x86_vectort512
+          - jitstress_isas_x86_noavx512_vectort128
           - jitstress_isas_1_x86_noaes
           - jitstress_isas_1_x86_noavx
           - jitstress_isas_1_x86_noavx2

diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
@@ -686,6 +686,9 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
 #endif
 
 RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_MaxVectorTBitWidth,           W("MaxVectorTBitWidth"),        0, "The maximum decimal width, in bits, that Vector<T> is allowed to be. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10)
+#if defined(TARGET_AMD64) || defined(TARGET_X86)
+RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PreferredVectorBitWidth,      W("PreferredVectorBitWidth"),   0, "The maximum decimal width, in bits, of fixed-width vectors that may be considered hardware accelerated. A value less than 128 is treated as the system default.", CLRConfig::LookupOptions::ParseIntegerAsBase10)
+#endif // defined(TARGET_AMD64) || defined(TARGET_X86)
 
 //
 // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h

diff --git a/src/coreclr/inc/corjitflags.h b/src/coreclr/inc/corjitflags.h
@@ -64,10 +64,6 @@ class CORJIT_FLAGS
         CORJIT_FLAG_SOFTFP_ABI              = 30, // Enable armel calling convention
 #endif
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-        CORJIT_FLAG_VECTOR512_THROTTLING    = 31, // On x86/x64, 512-bit vector usage may incur CPU frequency throttling
-#endif
-
     };
 
     CORJIT_FLAGS()

diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* cc0e7adf-e397-40b6-9d14-a7149815c991 */
-    0xcc0e7adf,
-    0xe397,
-    0x40b6,
-    {0x9d, 0x14, 0xa7, 0x14, 0x98, 0x15, 0xc9, 0x91}
+constexpr GUID JITEEVersionIdentifier = { /* 78acb599-d9be-4ea1-8e93-546ec43e0487 */
+    0x78acb599,
+    0xd9be,
+    0x4ea1,
+    {0x8e, 0x93, 0x54, 0x6e, 0xc4, 0x3e, 0x04, 0x87}
   };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
@@ -2213,9 +2213,34 @@ void Compiler::compSetProcessor()
 // don't actually exist. The JIT is in charge of adding those and ensuring
 // the total sum of flags is still valid.
 #if defined(TARGET_XARCH)
-    // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits
-    uint32_t preferredVectorBitWidth   = (ReinterpretHexAsDecimal(JitConfig.PreferredVectorBitWidth()) / 128) * 128;
-    uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8;
+    // If the VM passed in a virtual vector ISA, it was done to communicate PreferredVectorBitWidth.
+    // No check is done for the validity of the value, since it will be clamped to max supported by
+    // hardware and config when queried.  We will, therefore, remove the marker ISA and allow it to
+    // be re-added if appropriate based on the hardware ISA evaluations below.
+
+    uint32_t preferredVectorBitWidth = 0;
+    if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector128))
+    {
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector128);
+        preferredVectorBitWidth = 128;
+    }
+    else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector256))
+    {
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector256);
+        preferredVectorBitWidth = 256;
+    }
+    else if (instructionSetFlags.HasInstructionSet(InstructionSet_Vector512))
+    {
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_Vector512);
+        preferredVectorBitWidth = 512;
+    }
+
+    opts.preferredVectorByteLength = preferredVectorBitWidth / 8;
+
+    // Only one marker ISA should have been passed in, and it should now be cleared.
+    assert(!instructionSetFlags.HasInstructionSet(InstructionSet_Vector128) &&
+           !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) &&
+           !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512));
 
     if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE))
     {
@@ -2247,29 +2272,13 @@ void Compiler::compSetProcessor()
             assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
 
             instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
-
-            if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING))
-            {
-                // Some architectures can experience frequency throttling when
-                // executing 512-bit width instructions. To account for this we set the
-                // default preferred vector width to 256-bits in some scenarios. Power
-                // users can override this with `DOTNET_PreferredVectorBitWidth=512` to
-                // allow using such instructions where hardware support is available.
-                //
-                // Do not condition this based on stress mode as it makes the support
-                // reported inconsistent across methods and breaks expectations/functionality
-
-                preferredVectorByteLength = 256 / 8;
-            }
         }
         else
         {
             // We shouldn't have EVEX enabled if neither AVX512 nor AVX10v1 are supported
             assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1));
         }
     }
-
-    opts.preferredVectorByteLength = preferredVectorByteLength;
 #elif defined(TARGET_ARM64)
     if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd))
     {

diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -1575,34 +1575,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_AsVector:
-        {
-            assert(sig->numArgs == 1);
-            uint32_t vectorTByteLength = getVectorTByteLength();
-
-            if (vectorTByteLength == YMM_REGSIZE_BYTES)
-            {
-                // Vector<T> is TYP_SIMD32, so we should treat this as a call to Vector128.ToVector256
-                return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig R2RARG(&emptyEntryPoint),
-                                           simdBaseJitType, retType, simdSize, mustExpand);
-            }
-            else if (vectorTByteLength == XMM_REGSIZE_BYTES)
-            {
-                // We fold away the cast here, as it only exists to satisfy
-                // the type system. It is safe to do this here since the retNode type
-                // and the signature return type are both the same TYP_SIMD.
-
-                retNode = impSIMDPopStack();
-                SetOpLclRelatedToSIMDIntrinsic(retNode);
-                assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
-            }
-            else
-            {
-                assert(vectorTByteLength == 0);
-            }
-            break;
-        }
-
         case NI_Vector128_AsVector2:
         case NI_Vector128_AsVector3:
         {
@@ -1695,10 +1667,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 }
 
                 case TYP_SIMD32:
+                case TYP_SIMD64:
                 {
-                    // Vector<T> is TYP_SIMD32, so we should treat this as a call to Vector256.GetLower
-                    return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig R2RARG(&emptyEntryPoint),
-                                               simdBaseJitType, retType, simdSize, mustExpand);
+                    // Vector<T> is larger, so we should treat this as a call to the appropriate narrowing intrinsic
+                    intrinsic = simdSize == YMM_REGSIZE_BYTES ? NI_Vector256_GetLower : NI_Vector512_GetLower128;
+
+                    return impSpecialIntrinsic(intrinsic, clsHnd, method, sig R2RARG(&emptyEntryPoint), simdBaseJitType,
+                                               retType, simdSize, mustExpand);
                 }
 
                 default:
@@ -1722,13 +1697,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector128_AsVector:
         case NI_Vector256_AsVector:
+        case NI_Vector512_AsVector:
         case NI_Vector256_AsVector256:
+        case NI_Vector512_AsVector512:
         {
             assert(sig->numArgs == 1);
             uint32_t vectorTByteLength = getVectorTByteLength();
 
-            if (vectorTByteLength == YMM_REGSIZE_BYTES)
+            if (vectorTByteLength == simdSize)
             {
                 // We fold away the cast here, as it only exists to satisfy
                 // the type system. It is safe to do this here since the retNode type
@@ -1740,86 +1718,90 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
                 break;
             }
-            else if (vectorTByteLength == XMM_REGSIZE_BYTES)
+            else if (vectorTByteLength != 0)
             {
-                if (compOpportunisticallyDependsOn(InstructionSet_AVX))
-                {
-                    // We support Vector256 but Vector<T> is only 16-bytes, so we should
-                    // treat this method as a call to Vector256.GetLower or Vector128.ToVector256
+                // Vector<T> is a different size than the source/target SIMD type, so we should
+                // treat this as a call to the appropriate narrowing or widening intrinsic.
+
+                NamedIntrinsic convertIntrinsic = NI_Illegal;
 
-                    if (intrinsic == NI_Vector256_AsVector)
+                switch (vectorTByteLength)
+                {
+                    case XMM_REGSIZE_BYTES:
                     {
-                        return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig R2RARG(&emptyEntryPoint),
-                                                   simdBaseJitType, retType, simdSize, mustExpand);
+                        switch (intrinsic)
+                        {
+                            case NI_Vector256_AsVector:
+                                convertIntrinsic = NI_Vector256_GetLower;
+                                break;
+                            case NI_Vector512_AsVector:
+                                convertIntrinsic = NI_Vector512_GetLower128;
+                                break;
+                            case NI_Vector256_AsVector256:
+                                convertIntrinsic = NI_Vector128_ToVector256;
+                                break;
+                            case NI_Vector512_AsVector512:
+                                convertIntrinsic = NI_Vector128_ToVector512;
+                                break;
+                            default:
+                                unreached();
+                        }
+                        break;
                     }
-                    else
+
+                    case YMM_REGSIZE_BYTES:
                     {
-                        assert(intrinsic == NI_Vector256_AsVector256);
-                        return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method,
-                                                   sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, 16,
-                                                   mustExpand);
+                        switch (intrinsic)
+                        {
+                            case NI_Vector128_AsVector:
+                                convertIntrinsic = NI_Vector128_ToVector256;
+                                break;
+                            case NI_Vector512_AsVector:
+                                convertIntrinsic = NI_Vector512_GetLower;
+                                break;
+                            case NI_Vector512_AsVector512:
+                                convertIntrinsic = NI_Vector256_ToVector512;
+                                break;
+                            default:
+                                unreached();
+                        }
+                        break;
                     }
-                }
-            }
-            else
-            {
-                assert(vectorTByteLength == 0);
-            }
-            break;
-        }
-
-        case NI_Vector512_AsVector:
-        case NI_Vector512_AsVector512:
-        {
-            assert(sig->numArgs == 1);
-            uint32_t vectorTByteLength = getVectorTByteLength();
-
-            if (vectorTByteLength == YMM_REGSIZE_BYTES)
-            {
-                assert(IsBaselineVector512IsaSupportedDebugOnly());
-
-                // We support Vector512 but Vector<T> is only 32-bytes, so we should
-                // treat this method as a call to Vector512.GetLower or Vector256.ToVector512
 
-                if (intrinsic == NI_Vector512_AsVector)
-                {
-                    return impSpecialIntrinsic(NI_Vector512_GetLower, clsHnd, method, sig R2RARG(&emptyEntryPoint),
-                                               simdBaseJitType, retType, simdSize, mustExpand);
-                }
-                else
-                {
-                    assert(intrinsic == NI_Vector512_AsVector512);
-                    return impSpecialIntrinsic(NI_Vector256_ToVector512, clsHnd, method, sig R2RARG(&emptyEntryPoint),
-                                               simdBaseJitType, retType, 32, mustExpand);
-                }
-                break;
-            }
-            else if (vectorTByteLength == XMM_REGSIZE_BYTES)
-            {
-                if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
-                {
-                    // We support Vector512 but Vector<T> is only 16-bytes, so we should
-                    // treat this method as a call to Vector512.GetLower128 or Vector128.ToVector512
-
-                    if (intrinsic == NI_Vector512_AsVector)
+                    case ZMM_REGSIZE_BYTES:
                     {
-                        return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method,
-                                                   sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, simdSize,
-                                                   mustExpand);
+                        switch (intrinsic)
+                        {
+                            case NI_Vector128_AsVector:
+                                convertIntrinsic = NI_Vector128_ToVector512;
+                                break;
+                            case NI_Vector256_AsVector:
+                                convertIntrinsic = NI_Vector256_ToVector512;
+                                break;
+                            case NI_Vector256_AsVector256:
+                                convertIntrinsic = NI_Vector512_GetLower;
+                                break;
+                            default:
+                                unreached();
+                        }
+                        break;
                     }
-                    else
+
+                    default:
                     {
-                        assert(intrinsic == NI_Vector512_AsVector512);
-                        return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method,
-                                                   sig R2RARG(&emptyEntryPoint), simdBaseJitType, retType, 16,
-                                                   mustExpand);
+                        unreached();
                     }
                 }
+
+                unsigned convertSize = simdSize;
+                bool     sizeFound   = HWIntrinsicInfo::tryLookupSimdSize(convertIntrinsic, &convertSize);
+                assert(sizeFound);
+
+                return impSpecialIntrinsic(convertIntrinsic, clsHnd, method, sig R2RARG(&emptyEntryPoint),
+                                           simdBaseJitType, retType, convertSize, mustExpand);
             }
-            else
-            {
-                assert(vectorTByteLength == 0);
-            }
+
+            // VectorT ISA was not present. Fall back to managed.
             break;
         }
 

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
@@ -3785,7 +3785,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI
                 int simdWidth = getSIMDTypeSizeInBytes(fieldClsHnd);
                 if ((simdWidth > 0) && IsBaselineSimdIsaSupported())
                 {
-                    assert((totalSize <= 32) && (totalSize <= MaxStructSize));
+                    assert((totalSize <= 64) && (totalSize <= MaxStructSize));
                     var_types simdType = getSIMDTypeForSize(simdWidth);
 
                     bool hwAccelerated = true;

diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
@@ -380,8 +380,6 @@ CONFIG_INTEGER(JitBypassAPXCheck, "JitBypassAPXCheck", 0)         // Bypass APX
 CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0)
 #endif
 
-RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth,     "PreferredVectorBitWidth",   0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default.
-
 //
 // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h
 //

diff --git a/src/coreclr/jit/jitee.h b/src/coreclr/jit/jitee.h
@@ -44,10 +44,6 @@ class JitFlags
         JIT_FLAG_SOFTFP_ABI              = 30, // Enable armel calling convention
 #endif
 
-#if defined(TARGET_XARCH)
-        JIT_FLAG_VECTOR512_THROTTLING    = 31, // On Xarch, 512-bit vector usage may incur CPU frequency throttling
-#endif
-
         // Note: the mcs tool uses the currently unused upper flags bits when outputting SuperPMI MC file flags.
         // See EXTRA_JIT_FLAGS and spmidumphelper.cpp. Currently, these are bits 56 through 63. If they overlap,
         // something needs to change.
@@ -147,10 +143,6 @@ class JitFlags
         FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SOFTFP_ABI, JIT_FLAG_SOFTFP_ABI);
 #endif // TARGET_ARM
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-        FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_VECTOR512_THROTTLING, JIT_FLAG_VECTOR512_THROTTLING);
-#endif // TARGET_ARM
-
 #undef FLAGS_EQUAL
     }
-Original file line number
+Diff line change
@@ Expand Up @@
     CONFIG_INTEGER(JitStressEvexEncoding, "JitStressEvexEncoding", 0)
     #endif
-    RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth,     "PreferredVectorBitWidth",   0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default.
     //
     // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h
     //
@@ Expand Down @@