public override unsafe int FindFirstCharacterToEncode(char *text, int textLength) { if (text == null) { throw new ArgumentNullException(nameof(text)); } int idx = 0; #if NETCOREAPP if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { short *startingAddress = (short *)text; while (textLength - 8 >= idx) { Debug.Assert(startingAddress >= text && startingAddress <= (text + textLength - 8)); // Load the next 8 characters. Vector128 <short> sourceValue; Vector128 <short> mask; bool containsNonAsciiChars; if (Sse2.IsSupported) { sourceValue = Sse2.LoadVector128(startingAddress); mask = Sse2Helper.CreateAsciiMask(sourceValue); containsNonAsciiChars = Sse2Helper.ContainsNonAsciiByte(mask.AsSByte()); } else if (AdvSimd.Arm64.IsSupported) { sourceValue = AdvSimd.LoadVector128(startingAddress); mask = AdvSimdHelper.CreateAsciiMask(sourceValue); containsNonAsciiChars = AdvSimdHelper.ContainsNonAsciiByte(mask.AsSByte()); } else { throw new PlatformNotSupportedException(); } if (containsNonAsciiChars) { // At least one of the following 8 characters is non-ASCII. int processNextEight = idx + 8; Debug.Assert(processNextEight <= textLength); for (; idx < processNextEight; idx++) { Debug.Assert((text + idx) <= (text + textLength)); if (!_allowedCharacters.IsCharacterAllowed(*(text + idx))) { goto Return; } } startingAddress += 8; } else { int index; // Check if any of the 8 characters need to be escaped. if (Sse2.IsSupported) { mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } else if (AdvSimd.Arm64.IsSupported) { mask = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } else { throw new PlatformNotSupportedException(); } // If index >= 16, that means none of the 8 characters needed to be escaped. if (index < 16) { // Found at least one character that needs to be escaped, figure out the index of // the first one found that needed to be escaped within the 8 characters. Debug.Assert(index % 2 == 0); idx += index >> 1; goto Return; } idx += 8; startingAddress += 8; } } // Process the remaining characters. Debug.Assert(textLength - idx < 8); } #endif for (; idx < textLength; idx++) { Debug.Assert((text + idx) <= (text + textLength)); if (!_allowedCharacters.IsCharacterAllowed(*(text + idx))) { goto Return; } } idx = -1; // All characters are allowed. Return: return(idx); }
public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text) { fixed(byte *ptr = utf8Text) { int idx = 0; #if NETCOREAPP if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { sbyte *startingAddress = (sbyte *)ptr; while (utf8Text.Length - 16 >= idx) { Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16)); // Load the next 16 bytes. Vector128 <sbyte> sourceValue; bool containsNonAsciiBytes; // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when // casted to signed byte. if (Sse2.IsSupported) { sourceValue = Sse2.LoadVector128(startingAddress); containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue); } else { sourceValue = AdvSimd.LoadVector128(startingAddress); containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue); } if (containsNonAsciiBytes) { // At least one of the following 16 bytes is non-ASCII. int processNextSixteen = idx + 16; Debug.Assert(processNextSixteen <= utf8Text.Length); while (idx < processNextSixteen) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } startingAddress = (sbyte *)ptr + idx; } else { // Check if any of the 16 bytes need to be escaped. int index; if (Sse2.IsSupported) { Vector128 <sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = Sse2Helper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } else { Vector128 <sbyte> mask = AdvSimdHelper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue); index = AdvSimdHelper.GetIndexOfFirstNonAsciiByte(mask.AsByte()); } // If index >= 16, that means none of the 16 bytes needed to be escaped. if (index < 16) { // Found at least one byte that needs to be escaped, figure out the index of // the first one found that needed to be escaped within the 16 bytes. idx += index; goto Return; } idx += 16; startingAddress += 16; } } // Process the remaining bytes. Debug.Assert(utf8Text.Length - idx < 16); } #endif while (idx < utf8Text.Length) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } Debug.Assert(idx == utf8Text.Length); idx = -1; // All bytes are allowed. Return: return(idx); } }