dotnet · teo-tsirpanis · Jan 5, 2025 · Jan 5, 2025 · Jan 5, 2025 · Jan 5, 2025
diff --git a/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs b/src/libraries/System.Private.CoreLib/src/System/Reflection/AssemblyName.cs
@@ -1,8 +1,10 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers;
 using System.ComponentModel;
 using System.Configuration.Assemblies;
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.Serialization;
 using System.Text;
@@ -303,175 +305,91 @@ public static bool ReferenceMatchesDefinition(AssemblyName? reference, AssemblyN
             return refName.Equals(defName, StringComparison.OrdinalIgnoreCase);
         }
 
-        [RequiresAssemblyFiles("The code will return an empty string for assemblies embedded in a single-file app")]
+        // This implementation of Escape has been copied from UriHelper from System.Private.Uri and adapted to match AssemblyName's requirements.
         internal static string EscapeCodeBase(string? codebase)
         {
             if (codebase == null)
                 return string.Empty;
 
-            int position = 0;
-            char[]? dest = EscapeString(codebase, 0, codebase.Length, null, ref position, true, c_DummyChar, c_DummyChar, c_DummyChar);
-            if (dest == null)
+            int indexOfFirstToEscape = codebase.AsSpan().IndexOfAnyExcept(UnreservedReserved);
+            if (indexOfFirstToEscape < 0)
+            {
+                // Nothing to escape, just return the original value.
                 return codebase;
+            }
+
+            // Otherwise, create a ValueStringBuilder to store the escaped data into,
+            // escape the rest, and concat the result with the characters we skipped above.
+            var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]);
+
+            // We may throw for very large inputs (when growing the ValueStringBuilder).
+            vsb.EnsureCapacity(codebase.Length);
+
+            EscapeStringToBuilder(codebase.AsSpan(indexOfFirstToEscape), ref vsb);
 
-            return new string(dest, 0, position);
+            string result = string.Concat(codebase.AsSpan(0, indexOfFirstToEscape), vsb.AsSpan());
+            vsb.Dispose();
+            return result;
         }
 
-        // This implementation of EscapeString has been copied from System.Private.Uri from the runtime repo
-        // - forceX characters are always escaped if found
-        // - rsvd character will remain unescaped
-        //
-        // start    - starting offset from input
-        // end      - the exclusive ending offset in input
-        // destPos  - starting offset in dest for output, on return this will be an exclusive "end" in the output.
-        //
-        // In case "dest" has lack of space it will be reallocated by preserving the _whole_ content up to current destPos
-        //
-        // Returns null if nothing has to be escaped AND passed dest was null, otherwise the resulting array with the updated destPos
-        //
-        internal static unsafe char[]? EscapeString(string input, int start, int end, char[]? dest, ref int destPos,
-            bool isUriString, char force1, char force2, char rsvd)
+        internal static void EscapeStringToBuilder(scoped ReadOnlySpan<char> stringToEscape, ref ValueStringBuilder vsb)
         {
-            int i = start;
-            int prevInputPos = start;
-            byte* bytes = stackalloc byte[c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar];   // 40*4=160
+            // Allocate enough stack space to hold any Rune's UTF8 encoding.
+            Span<byte> utf8Bytes = stackalloc byte[4];
 
-            fixed (char* pStr = input)
+            while (!stringToEscape.IsEmpty)
             {
-                for (; i < end; ++i)
-                {
-                    char ch = pStr[i];
+                char ch = stringToEscape[0];
 
-                    // a Unicode ?
-                    if (ch > '\x7F')
+                if (!char.IsAscii(ch))
+                {
+                    if (Rune.DecodeFromUtf16(stringToEscape, out Rune r, out int charsConsumed) != OperationStatus.Done)
                     {
-                        short maxSize = (short)Math.Min(end - i, (int)c_MaxUnicodeCharsReallocate - 1);
-
-                        short count = 1;
-                        for (; count < maxSize && pStr[i + count] > '\x7f'; ++count) ;
-
-                        // Is the last a high surrogate?
-                        if (pStr[i + count - 1] >= 0xD800 && pStr[i + count - 1] <= 0xDBFF)
-                        {
-                            // Should be a rare case where the app tries to feed an invalid Unicode surrogates pair
-                            if (count == 1 || count == end - i)
-                                throw new FormatException(SR.Arg_FormatException);
-                            // need to grab one more char as a Surrogate except when it's a bogus input
-                            ++count;
-                        }
-
-                        dest = EnsureDestinationSize(pStr, dest, i,
-                            (short)(count * c_MaxUTF_8BytesPerUnicodeChar * c_EncodedCharsPerByte),
-                            c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar * c_EncodedCharsPerByte,
-                            ref destPos, prevInputPos);
-
-                        short numberOfBytes = (short)Encoding.UTF8.GetBytes(pStr + i, count, bytes,
-                            c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar);
-
-                        // This is the only exception that built in UriParser can throw after a Uri ctor.
-                        // Should not happen unless the app tries to feed an invalid Unicode string
-                        if (numberOfBytes == 0)
-                            throw new FormatException(SR.Arg_FormatException);
+                        r = Rune.ReplacementChar;
+                    }
 
-                        i += (count - 1);
+                    Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r);
 Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r); 
 Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r); 
+                    Debug.Assert(charsConsumed is 1 or 2);
 
-                        for (count = 0; count < numberOfBytes; ++count)
-                            EscapeAsciiChar((char)bytes[count], dest, ref destPos);
+                    stringToEscape = stringToEscape.Slice(charsConsumed);
 
-                        prevInputPos = i + 1;
-                    }
-                    else if (ch == '%' && rsvd == '%')
-                    {
-                        // Means we don't reEncode '%' but check for the possible escaped sequence
-                        dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte,
-                            c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos);
-                        if (i + 2 < end && char.IsAsciiHexDigit(pStr[i + 1]) && char.IsAsciiHexDigit(pStr[i + 2]))
-                        {
-                            // leave it escaped
-                            dest[destPos++] = '%';
-                            dest[destPos++] = pStr[i + 1];
-                            dest[destPos++] = pStr[i + 2];
-                            i += 2;
-                        }
-                        else
-                        {
-                            EscapeAsciiChar('%', dest, ref destPos);
-                        }
-                        prevInputPos = i + 1;
-                    }
-                    else if (ch == force1 || ch == force2 || (ch != rsvd && (isUriString ? !IsReservedUnreservedOrHash(ch) : !IsUnreserved(ch))))
+                    // The rune is non-ASCII, so encode it as UTF8, and escape each UTF8 byte.
+                    r.TryEncodeToUtf8(utf8Bytes, out int bytesWritten);
+                    foreach (byte b in utf8Bytes.Slice(0, bytesWritten))
                     {
-                        dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte,
-                            c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos);
-                        EscapeAsciiChar(ch, dest, ref destPos);
-                        prevInputPos = i + 1;
+                        PercentEncodeByte(b, ref vsb);
                     }
                 }
-
-                if (prevInputPos != i)
+                else if (!UnreservedReserved.Contains(ch))
                 {
-                    // need to fill up the dest array ?
-                    if (prevInputPos != start || dest != null)
-                        dest = EnsureDestinationSize(pStr, dest, i, 0, 0, ref destPos, prevInputPos);
+                    PercentEncodeByte((byte)ch, ref vsb);
+                    stringToEscape = stringToEscape.Slice(1);
                 }
-            }
-
-            return dest;
-        }
-
-        //
-        // ensure destination array has enough space and contains all the needed input stuff
-        //
-        private static unsafe char[] EnsureDestinationSize(char* pStr, char[]? dest, int currentInputPos,
-            short charsToAdd, short minReallocateChars, ref int destPos, int prevInputPos)
-        {
-            if (dest is null || dest.Length < destPos + (currentInputPos - prevInputPos) + charsToAdd)
-            {
-                // allocating or reallocating array by ensuring enough space based on maxCharsToAdd.
-                char[] newresult = new char[destPos + (currentInputPos - prevInputPos) + minReallocateChars];
+                else
+                {
+                    // We have a character we don't want to escape. It's likely there are more, do a vectorized search.
+                    int charsToCopy = stringToEscape.IndexOfAnyExcept(UnreservedReserved);
+                    if (charsToCopy < 0)
+                    {
+                        charsToCopy = stringToEscape.Length;
+                    }
+                    Debug.Assert(charsToCopy > 0);
 
-                if (dest is not null && destPos != 0)
-                    Buffer.BlockCopy(dest, 0, newresult, 0, destPos << 1);
-                dest = newresult;
+                    vsb.Append(stringToEscape.Slice(0, charsToCopy));
+                    stringToEscape = stringToEscape.Slice(charsToCopy);
+                }
             }
-
-            // ensuring we copied everything form the input string left before last escaping
-            while (prevInputPos != currentInputPos)
-                dest[destPos++] = pStr[prevInputPos++];
-            return dest;
-        }
-
-        internal static void EscapeAsciiChar(char ch, char[] to, ref int pos)
-        {
-            to[pos++] = '%';
-            to[pos++] = HexConverter.ToCharUpper(ch >> 4);
-            to[pos++] = HexConverter.ToCharUpper(ch);
         }
 
-        private static bool IsReservedUnreservedOrHash(char c)
+        internal static void PercentEncodeByte(byte ch, ref ValueStringBuilder vsb)
         {
-            if (IsUnreserved(c))
-            {
-                return true;
-            }
-            return RFC3986ReservedMarks.Contains(c);
+            vsb.Append('%');
+            HexConverter.ToCharsBuffer(ch, vsb.AppendSpan(2), 0, HexConverter.Casing.Upper);
         }
 
-        internal static bool IsUnreserved(char c)
-        {
-            if (char.IsAsciiLetterOrDigit(c))
-            {
-                return true;
-            }
-            return RFC3986UnreservedMarks.Contains(c);
-        }
+        [field: AllowNull]
+        private static SearchValues<char> UnreservedReserved => field ??= SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~");
 
-        internal const char c_DummyChar = (char)0xFFFF;     // An Invalid Unicode character used as a dummy char passed into the parameter
-        private const short c_MaxAsciiCharsReallocate = 40;
-        private const short c_MaxUnicodeCharsReallocate = 40;
-        private const short c_MaxUTF_8BytesPerUnicodeChar = 4;
-        private const short c_EncodedCharsPerByte = 3;
-        private const string RFC3986ReservedMarks = ":/?#[]@!$&'()*+,;=";
-        private const string RFC3986UnreservedMarks = "-._~";
+        private const int StackallocThreshold = 512;
     }
 }