public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text) { if (!_isAsciiCacheInitialized) { InitializeAsciiCache(); } // Loop through the input text, terminating when we see ill-formed UTF-8 or when we decode a scalar value // that must be encoded. If we see either of these things then we'll return its index in the original // input sequence. If we consume the entire text without seeing either of these, return -1 to indicate // that the text can be copied as-is without escaping. fixed(byte *ptr = utf8Text) { int idx = 0; #if NETCOREAPP if ((Sse2.IsSupported || AdvSimd.Arm64.IsSupported) && utf8Text.Length - 16 >= idx) { // Hoist these outside the loop, as the JIT won't do it. Vector128 <sbyte> bitMaskLookupAsciiNeedsEscaping = _bitMaskLookupAsciiNeedsEscaping; Vector128 <sbyte> bitPosLookup = Ssse3Helper.s_bitPosLookup; Vector128 <sbyte> nibbleMaskSByte = Ssse3Helper.s_nibbleMaskSByte; Vector128 <sbyte> nullMaskSByte = Ssse3Helper.s_nullMaskSByte; sbyte *startingAddress = (sbyte *)ptr; do { Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16)); // Load the next 16 bytes. Vector128 <sbyte> sourceValue; bool containsNonAsciiBytes; // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when // casted to signed byte. if (Sse2.IsSupported) { sourceValue = Sse2.LoadVector128(startingAddress); containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue); } else if (AdvSimd.Arm64.IsSupported) { sourceValue = AdvSimd.LoadVector128(startingAddress); containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue); } else { throw new PlatformNotSupportedException(); } if (!containsNonAsciiBytes) { // All of the following 16 bytes is ASCII. // TODO AdvSimd: optimization maybe achievable using VectorTableLookup and/or VectorTableLookupExtension if (Ssse3.IsSupported) { Vector128 <sbyte> mask = Ssse3Helper.CreateEscapingMask(sourceValue, bitMaskLookupAsciiNeedsEscaping, bitPosLookup, nibbleMaskSByte, nullMaskSByte); int index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); if (index < 16) { idx += index; goto Return; } } else { byte *p = (byte *)startingAddress; if (DoesAsciiNeedEncoding(p[0])) { goto Return; } if (DoesAsciiNeedEncoding(p[1])) { goto Return1; } if (DoesAsciiNeedEncoding(p[2])) { goto Return2; } if (DoesAsciiNeedEncoding(p[3])) { goto Return3; } if (DoesAsciiNeedEncoding(p[4])) { goto Return4; } if (DoesAsciiNeedEncoding(p[5])) { goto Return5; } if (DoesAsciiNeedEncoding(p[6])) { goto Return6; } if (DoesAsciiNeedEncoding(p[7])) { goto Return7; } if (DoesAsciiNeedEncoding(p[8])) { goto Return8; } if (DoesAsciiNeedEncoding(p[9])) { goto Return9; } if (DoesAsciiNeedEncoding(p[10])) { goto Return10; } if (DoesAsciiNeedEncoding(p[11])) { goto Return11; } if (DoesAsciiNeedEncoding(p[12])) { goto Return12; } if (DoesAsciiNeedEncoding(p[13])) { goto Return13; } if (DoesAsciiNeedEncoding(p[14])) { goto Return14; } if (DoesAsciiNeedEncoding(p[15])) { goto Return15; } } idx += 16; } else { // At least one of the following 16 bytes is non-ASCII. int processNextSixteen = idx + 16; Debug.Assert(processNextSixteen <= utf8Text.Length); while (idx < processNextSixteen) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (DoesAsciiNeedEncoding(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } } startingAddress = (sbyte *)ptr + idx; }while (utf8Text.Length - 16 >= idx); // Process the remaining bytes. Debug.Assert(utf8Text.Length - idx < 16); } #endif while (idx < utf8Text.Length) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (DoesAsciiNeedEncoding(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } Debug.Assert(idx == utf8Text.Length); idx = -1; // All bytes are allowed. goto Return; #if NETCOREAPP Return15: return(idx + 15); Return14: return(idx + 14); Return13: return(idx + 13); Return12: return(idx + 12); Return11: return(idx + 11); Return10: return(idx + 10); Return9: return(idx + 9); Return8: return(idx + 8); Return7: return(idx + 7); Return6: return(idx + 6); Return5: return(idx + 5); Return4: return(idx + 4); Return3: return(idx + 3); Return2: return(idx + 2); Return1: return(idx + 1); #endif Return: return(idx); } }
public override unsafe int FindFirstCharacterToEncode(char *text, int textLength) { if (text == null) { throw new ArgumentNullException(nameof(text)); } int idx = 0; #if NETCOREAPP if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { short *startingAddress = (short *)text; while (textLength - 8 >= idx) { Debug.Assert(startingAddress >= text && startingAddress <= (text + textLength - 8)); // Load the next 8 characters. Vector128 <short> sourceValue; Vector128 <short> mask; bool containsNonAsciiChars; if (Sse2.IsSupported) { sourceValue = Sse2.LoadVector128(startingAddress); mask = Sse2Helper.CreateAsciiMask(sourceValue); containsNonAsciiChars = Sse2Helper.ContainsNonAsciiByte(mask.AsSByte()); } else if (AdvSimd.Arm64.IsSupported) { sourceValue = AdvSimd.LoadVector128(startingAddress); mask = AdvSimdHelper.CreateAsciiMask(sourceValue); containsNonAsciiChars = AdvSimdHelper.ContainsNonAsciiByte(mask.AsSByte()); } else { throw new PlatformNotSupportedException(); } if (containsNonAsciiChars) { // At least one of the following 8 characters is non-ASCII. int processNextEight = idx + 8; Debug.Assert(processNextEight <= textLength); for (; idx < processNextEight; idx++) { Debug.Assert((text + idx) <= (text + textLength)); if (!_allowedCharacters.IsCharacterAllowed(*(text + idx))) { goto Return; } } startingAddress += 8; } else { int index; // Check if any of the 8 characters need to be escaped. if (Sse2.IsSupported) { mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } else if (AdvSimd.Arm64.IsSupported) { mask = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } else { throw new PlatformNotSupportedException(); } // If index >= 16, that means none of the 8 characters needed to be escaped. if (index < 16) { // Found at least one character that needs to be escaped, figure out the index of // the first one found that needed to be escaped within the 8 characters. Debug.Assert(index % 2 == 0); idx += index >> 1; goto Return; } idx += 8; startingAddress += 8; } } // Process the remaining characters. Debug.Assert(textLength - idx < 8); } #endif for (; idx < textLength; idx++) { Debug.Assert((text + idx) <= (text + textLength)); if (!_allowedCharacters.IsCharacterAllowed(*(text + idx))) { goto Return; } } idx = -1; // All characters are allowed. Return: return(idx); }
public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text) { fixed(byte *ptr = utf8Text) { int idx = 0; #if NETCOREAPP if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { sbyte *startingAddress = (sbyte *)ptr; while (utf8Text.Length - 16 >= idx) { Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16)); bool containsNonAsciiBytes; // Load the next 16 bytes, and check for ASCII text. // Any byte that's not in the ASCII range will already be negative when casted to signed byte. if (Sse2.IsSupported) { Vector128 <sbyte> sourceValue = Sse2.LoadVector128(startingAddress); containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue); } else if (AdvSimd.Arm64.IsSupported) { Vector128 <sbyte> sourceValue = AdvSimd.LoadVector128(startingAddress); containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue); } else { throw new PlatformNotSupportedException(); } if (containsNonAsciiBytes) { // At least one of the following 16 bytes is non-ASCII. int processNextSixteen = idx + 16; Debug.Assert(processNextSixteen <= utf8Text.Length); while (idx < processNextSixteen) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (DoesAsciiNeedEncoding(ptr[idx]) == 1) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } } else { if (DoesAsciiNeedEncoding(ptr[idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1 || DoesAsciiNeedEncoding(ptr[++idx]) == 1) { goto Return; } idx++; } startingAddress = (sbyte *)ptr + idx; } // Process the remaining bytes. Debug.Assert(utf8Text.Length - idx < 16); } #endif while (idx < utf8Text.Length) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (DoesAsciiNeedEncoding(ptr[idx]) == 1) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } Debug.Assert(idx == utf8Text.Length); idx = -1; // All bytes are allowed. Return: return(idx); } }
public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text) { fixed(byte *ptr = utf8Text) { int idx = 0; #if NETCOREAPP if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { sbyte *startingAddress = (sbyte *)ptr; while (utf8Text.Length - 16 >= idx) { Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16)); // Load the next 16 bytes. Vector128 <sbyte> sourceValue; bool containsNonAsciiBytes; // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when // casted to signed byte. if (Sse2.IsSupported) { sourceValue = Sse2.LoadVector128(startingAddress); containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue); } else { sourceValue = AdvSimd.LoadVector128(startingAddress); containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue); } if (containsNonAsciiBytes) { // At least one of the following 16 bytes is non-ASCII. int processNextSixteen = idx + 16; Debug.Assert(processNextSixteen <= utf8Text.Length); while (idx < processNextSixteen) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } startingAddress = (sbyte *)ptr + idx; } else { // Check if any of the 16 bytes need to be escaped. int index; if (Sse2.IsSupported) { Vector128 <sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } else { Vector128 <sbyte> mask = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } // If index >= 16, that means none of the 16 bytes needed to be escaped. if (index < 16) { // Found at least one byte that needs to be escaped, figure out the index of // the first one found that needed to be escaped within the 16 bytes. idx += index; goto Return; } idx += 16; startingAddress += 16; } } // Process the remaining bytes. Debug.Assert(utf8Text.Length - idx < 16); } #endif while (idx < utf8Text.Length) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } Debug.Assert(idx == utf8Text.Length); idx = -1; // All bytes are allowed. Return: return(idx); } }