Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify AssemblyName.EscapeCodeBase and remove unsafe. #111095

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.ComponentModel;
using System.Configuration.Assemblies;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.Serialization;
using System.Text;
Expand Down Expand Up @@ -303,175 +305,91 @@ public static bool ReferenceMatchesDefinition(AssemblyName? reference, AssemblyN
return refName.Equals(defName, StringComparison.OrdinalIgnoreCase);
}

[RequiresAssemblyFiles("The code will return an empty string for assemblies embedded in a single-file app")]
// This implementation of Escape has been copied from UriHelper from System.Private.Uri and adapted to match AssemblyName's requirements.
internal static string EscapeCodeBase(string? codebase)
{
if (codebase == null)
return string.Empty;

int position = 0;
char[]? dest = EscapeString(codebase, 0, codebase.Length, null, ref position, true, c_DummyChar, c_DummyChar, c_DummyChar);
if (dest == null)
int indexOfFirstToEscape = codebase.AsSpan().IndexOfAnyExcept(UnreservedReserved);
if (indexOfFirstToEscape < 0)
{
// Nothing to escape, just return the original value.
return codebase;
}

// Otherwise, create a ValueStringBuilder to store the escaped data into,
// escape the rest, and concat the result with the characters we skipped above.
var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]);

// We may throw for very large inputs (when growing the ValueStringBuilder).
vsb.EnsureCapacity(codebase.Length);

EscapeStringToBuilder(codebase.AsSpan(indexOfFirstToEscape), ref vsb);

return new string(dest, 0, position);
string result = string.Concat(codebase.AsSpan(0, indexOfFirstToEscape), vsb.AsSpan());
vsb.Dispose();
return result;
}

// This implementation of EscapeString has been copied from System.Private.Uri from the runtime repo
// - forceX characters are always escaped if found
// - rsvd character will remain unescaped
//
// start - starting offset from input
// end - the exclusive ending offset in input
// destPos - starting offset in dest for output, on return this will be an exclusive "end" in the output.
//
// In case "dest" has lack of space it will be reallocated by preserving the _whole_ content up to current destPos
//
// Returns null if nothing has to be escaped AND passed dest was null, otherwise the resulting array with the updated destPos
//
internal static unsafe char[]? EscapeString(string input, int start, int end, char[]? dest, ref int destPos,
bool isUriString, char force1, char force2, char rsvd)
internal static void EscapeStringToBuilder(scoped ReadOnlySpan<char> stringToEscape, ref ValueStringBuilder vsb)
{
int i = start;
int prevInputPos = start;
byte* bytes = stackalloc byte[c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar]; // 40*4=160
// Allocate enough stack space to hold any Rune's UTF8 encoding.
Span<byte> utf8Bytes = stackalloc byte[4];

fixed (char* pStr = input)
while (!stringToEscape.IsEmpty)
{
for (; i < end; ++i)
{
char ch = pStr[i];
char ch = stringToEscape[0];

// a Unicode ?
if (ch > '\x7F')
if (!char.IsAscii(ch))
{
if (Rune.DecodeFromUtf16(stringToEscape, out Rune r, out int charsConsumed) != OperationStatus.Done)
{
short maxSize = (short)Math.Min(end - i, (int)c_MaxUnicodeCharsReallocate - 1);

short count = 1;
for (; count < maxSize && pStr[i + count] > '\x7f'; ++count) ;

// Is the last a high surrogate?
if (pStr[i + count - 1] >= 0xD800 && pStr[i + count - 1] <= 0xDBFF)
{
// Should be a rare case where the app tries to feed an invalid Unicode surrogates pair
if (count == 1 || count == end - i)
throw new FormatException(SR.Arg_FormatException);
// need to grab one more char as a Surrogate except when it's a bogus input
++count;
}

dest = EnsureDestinationSize(pStr, dest, i,
(short)(count * c_MaxUTF_8BytesPerUnicodeChar * c_EncodedCharsPerByte),
c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar * c_EncodedCharsPerByte,
ref destPos, prevInputPos);

short numberOfBytes = (short)Encoding.UTF8.GetBytes(pStr + i, count, bytes,
c_MaxUnicodeCharsReallocate * c_MaxUTF_8BytesPerUnicodeChar);

// This is the only exception that built in UriParser can throw after a Uri ctor.
// Should not happen unless the app tries to feed an invalid Unicode string
if (numberOfBytes == 0)
throw new FormatException(SR.Arg_FormatException);
r = Rune.ReplacementChar;
}

i += (count - 1);
Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for needing this assert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied it from UriHelper.

Debug.Assert(stringToEscape.EnumerateRunes() is { } e && e.MoveNext() && e.Current == r);

Debug.Assert(charsConsumed is 1 or 2);

for (count = 0; count < numberOfBytes; ++count)
EscapeAsciiChar((char)bytes[count], dest, ref destPos);
stringToEscape = stringToEscape.Slice(charsConsumed);

prevInputPos = i + 1;
}
else if (ch == '%' && rsvd == '%')
{
// Means we don't reEncode '%' but check for the possible escaped sequence
dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte,
c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos);
if (i + 2 < end && char.IsAsciiHexDigit(pStr[i + 1]) && char.IsAsciiHexDigit(pStr[i + 2]))
{
// leave it escaped
dest[destPos++] = '%';
dest[destPos++] = pStr[i + 1];
dest[destPos++] = pStr[i + 2];
i += 2;
}
else
{
EscapeAsciiChar('%', dest, ref destPos);
}
prevInputPos = i + 1;
}
else if (ch == force1 || ch == force2 || (ch != rsvd && (isUriString ? !IsReservedUnreservedOrHash(ch) : !IsUnreserved(ch))))
// The rune is non-ASCII, so encode it as UTF8, and escape each UTF8 byte.
r.TryEncodeToUtf8(utf8Bytes, out int bytesWritten);
foreach (byte b in utf8Bytes.Slice(0, bytesWritten))
{
dest = EnsureDestinationSize(pStr, dest, i, c_EncodedCharsPerByte,
c_MaxAsciiCharsReallocate * c_EncodedCharsPerByte, ref destPos, prevInputPos);
EscapeAsciiChar(ch, dest, ref destPos);
prevInputPos = i + 1;
PercentEncodeByte(b, ref vsb);
}
}

if (prevInputPos != i)
else if (!UnreservedReserved.Contains(ch))
{
// need to fill up the dest array ?
if (prevInputPos != start || dest != null)
dest = EnsureDestinationSize(pStr, dest, i, 0, 0, ref destPos, prevInputPos);
PercentEncodeByte((byte)ch, ref vsb);
stringToEscape = stringToEscape.Slice(1);
}
}

return dest;
}

//
// ensure destination array has enough space and contains all the needed input stuff
//
private static unsafe char[] EnsureDestinationSize(char* pStr, char[]? dest, int currentInputPos,
short charsToAdd, short minReallocateChars, ref int destPos, int prevInputPos)
{
if (dest is null || dest.Length < destPos + (currentInputPos - prevInputPos) + charsToAdd)
{
// allocating or reallocating array by ensuring enough space based on maxCharsToAdd.
char[] newresult = new char[destPos + (currentInputPos - prevInputPos) + minReallocateChars];
else
{
// We have a character we don't want to escape. It's likely there are more, do a vectorized search.
int charsToCopy = stringToEscape.IndexOfAnyExcept(UnreservedReserved);
if (charsToCopy < 0)
{
charsToCopy = stringToEscape.Length;
}
Debug.Assert(charsToCopy > 0);

if (dest is not null && destPos != 0)
Buffer.BlockCopy(dest, 0, newresult, 0, destPos << 1);
dest = newresult;
vsb.Append(stringToEscape.Slice(0, charsToCopy));
stringToEscape = stringToEscape.Slice(charsToCopy);
}
}

// ensuring we copied everything form the input string left before last escaping
while (prevInputPos != currentInputPos)
dest[destPos++] = pStr[prevInputPos++];
return dest;
}

internal static void EscapeAsciiChar(char ch, char[] to, ref int pos)
{
to[pos++] = '%';
to[pos++] = HexConverter.ToCharUpper(ch >> 4);
to[pos++] = HexConverter.ToCharUpper(ch);
}

private static bool IsReservedUnreservedOrHash(char c)
internal static void PercentEncodeByte(byte ch, ref ValueStringBuilder vsb)
{
if (IsUnreserved(c))
{
return true;
}
return RFC3986ReservedMarks.Contains(c);
vsb.Append('%');
HexConverter.ToCharsBuffer(ch, vsb.AppendSpan(2), 0, HexConverter.Casing.Upper);
}

internal static bool IsUnreserved(char c)
{
if (char.IsAsciiLetterOrDigit(c))
{
return true;
}
return RFC3986UnreservedMarks.Contains(c);
}
[field: AllowNull]
private static SearchValues<char> UnreservedReserved => field ??= SearchValues.Create("!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~");

internal const char c_DummyChar = (char)0xFFFF; // An Invalid Unicode character used as a dummy char passed into the parameter
private const short c_MaxAsciiCharsReallocate = 40;
private const short c_MaxUnicodeCharsReallocate = 40;
private const short c_MaxUTF_8BytesPerUnicodeChar = 4;
private const short c_EncodedCharsPerByte = 3;
private const string RFC3986ReservedMarks = ":/?#[]@!$&'()*+,;=";
private const string RFC3986UnreservedMarks = "-._~";
private const int StackallocThreshold = 512;
steveharter marked this conversation as resolved.
Show resolved Hide resolved
}
}
Loading