public override unsafe int FindFirstCharacterToEncode(char *text, int textLength) { if (text == null) { throw new ArgumentNullException(nameof(text)); } Debug.Assert(textLength >= 0); if (textLength == 0) { goto AllAllowed; } int idx = 0; short *ptr = (short *)text; short *end = ptr + (uint)textLength; #if NETCOREAPP if (Sse2.IsSupported && textLength >= Vector128 <short> .Count) { goto VectorizedEntry; } Sequential: #endif Debug.Assert(textLength > 0 && ptr < end); do { Debug.Assert(text <= ptr && ptr < (text + textLength)); if (NeedsEscaping(*(char *)ptr)) { goto Return; } ptr++; idx++; }while (ptr < end); AllAllowed: idx = -1; Return: return(idx); #if NETCOREAPP VectorizedEntry: int index; short *vectorizedEnd; if (textLength >= 2 * Vector128 <short> .Count) { vectorizedEnd = end - 2 * Vector128 <short> .Count; do { Debug.Assert(text <= ptr && ptr <= (text + textLength - 2 * Vector128 <short> .Count)); // Load the next 16 characters, combine them to one byte vector. // Chars that don't cleanly convert to ASCII bytes will get converted (saturated) to // somewhere in the range [0x7F, 0xFF], which the NeedsEscaping method will detect. Vector128 <sbyte> sourceValue = Sse2.PackSignedSaturate( Sse2.LoadVector128(ptr), Sse2.LoadVector128(ptr + Vector128 <short> .Count)); // Check if any of the 16 characters need to be escaped. index = NeedsEscaping(sourceValue); // If index == 0, that means none of the 16 characters needed to be escaped. // TrailingZeroCount is relatively expensive, avoid it if possible. if (index != 0) { goto VectorizedFound; } ptr += 2 * Vector128 <short> .Count; }while (ptr <= vectorizedEnd); } vectorizedEnd = end - Vector128 <short> .Count; Vectorized: // PERF: JIT produces better code for do-while as for a while-loop (no spills) if (ptr <= vectorizedEnd) { do { Debug.Assert(text <= ptr && ptr <= (text + textLength - Vector128 <short> .Count)); // Load the next 8 characters + a dummy known that it must not be escaped. // Put the dummy second, so it's easier for GetIndexOfFirstNeedToEscape. Vector128 <sbyte> sourceValue = Sse2.PackSignedSaturate( Sse2.LoadVector128(ptr), Vector128.Create((short)'A')); // max. one "iteration", so no need to cache this vector index = NeedsEscaping(sourceValue); // If index == 0, that means none of the 16 bytes needed to be escaped. // TrailingZeroCount is relatively expensive, avoid it if possible. if (index != 0) { goto VectorizedFound; } ptr += Vector128 <short> .Count; }while (ptr <= vectorizedEnd); } // Process the remaining characters. Debug.Assert(end - ptr < Vector128 <short> .Count); // Process the remaining elements vectorized, only if the remaining count // is above thresholdForRemainingVectorized, otherwise process them sequential. // Threshold found by testing. const int thresholdForRemainingVectorized = 5; if (ptr < end - thresholdForRemainingVectorized) { ptr = vectorizedEnd; goto Vectorized; } idx = CalculateIndex(ptr, text); if (idx < textLength) { goto Sequential; } goto AllAllowed; VectorizedFound: idx = BitHelper.GetIndexOfFirstNeedToEscape(index); idx += CalculateIndex(ptr, text); return(idx);
public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text) { if (!_isAsciiCacheInitialized) { InitializeAsciiCache(); } // Loop through the input text, terminating when we see ill-formed UTF-8 or when we decode a scalar value // that must be encoded. If we see either of these things then we'll return its index in the original // input sequence. If we consume the entire text without seeing either of these, return -1 to indicate // that the text can be copied as-is without escaping. fixed(byte *ptr = utf8Text) { int idx = 0; #if NETCOREAPP if (Sse2.IsSupported && utf8Text.Length - 16 >= idx) { // Hoist these outside the loop, as the JIT won't do it. Vector128 <sbyte> bitMaskLookupAsciiNeedsEscaping = _bitMaskLookupAsciiNeedsEscaping; Vector128 <sbyte> bitPosLookup = Ssse3Helper.s_bitPosLookup; Vector128 <sbyte> nibbleMaskSByte = Ssse3Helper.s_nibbleMaskSByte; Vector128 <sbyte> nullMaskSByte = Ssse3Helper.s_nullMaskSByte; sbyte *startingAddress = (sbyte *)ptr; do { Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16)); // Load the next 16 bytes. Vector128 <sbyte> sourceValue = Sse2.LoadVector128(startingAddress); // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when // casted to signed byte. int index = Sse2.MoveMask(sourceValue); if (index == 0) { // All of the following 16 bytes is ASCII. if (Ssse3.IsSupported) { Vector128 <sbyte> mask = Ssse3Helper.CreateEscapingMask(sourceValue, bitMaskLookupAsciiNeedsEscaping, bitPosLookup, nibbleMaskSByte, nullMaskSByte); index = Sse2.MoveMask(mask); if (index != 0) { idx += BitHelper.GetIndexOfFirstNeedToEscape(index); goto Return; } } else { byte *p = (byte *)startingAddress; if (DoesAsciiNeedEncoding(p[0])) { goto Return; } if (DoesAsciiNeedEncoding(p[1])) { goto Return1; } if (DoesAsciiNeedEncoding(p[2])) { goto Return2; } if (DoesAsciiNeedEncoding(p[3])) { goto Return3; } if (DoesAsciiNeedEncoding(p[4])) { goto Return4; } if (DoesAsciiNeedEncoding(p[5])) { goto Return5; } if (DoesAsciiNeedEncoding(p[6])) { goto Return6; } if (DoesAsciiNeedEncoding(p[7])) { goto Return7; } if (DoesAsciiNeedEncoding(p[8])) { goto Return8; } if (DoesAsciiNeedEncoding(p[9])) { goto Return9; } if (DoesAsciiNeedEncoding(p[10])) { goto Return10; } if (DoesAsciiNeedEncoding(p[11])) { goto Return11; } if (DoesAsciiNeedEncoding(p[12])) { goto Return12; } if (DoesAsciiNeedEncoding(p[13])) { goto Return13; } if (DoesAsciiNeedEncoding(p[14])) { goto Return14; } if (DoesAsciiNeedEncoding(p[15])) { goto Return15; } } idx += 16; } else { // At least one of the following 16 bytes is non-ASCII. int processNextSixteen = idx + 16; Debug.Assert(processNextSixteen <= utf8Text.Length); while (idx < processNextSixteen) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (DoesAsciiNeedEncoding(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } } startingAddress = (sbyte *)ptr + idx; }while (utf8Text.Length - 16 >= idx); // Process the remaining bytes. Debug.Assert(utf8Text.Length - idx < 16); } #endif while (idx < utf8Text.Length) { Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length)); if (UnicodeUtility.IsAsciiCodePoint(ptr[idx])) { if (DoesAsciiNeedEncoding(ptr[idx])) { goto Return; } idx++; } else { OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar); Debug.Assert(nextScalarValue <= int.MaxValue); if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue)) { goto Return; } Debug.Assert(opStatus == OperationStatus.Done); idx += utf8BytesConsumedForScalar; } } Debug.Assert(idx == utf8Text.Length); idx = -1; // All bytes are allowed. goto Return; #if NETCOREAPP Return15: return(idx + 15); Return14: return(idx + 14); Return13: return(idx + 13); Return12: return(idx + 12); Return11: return(idx + 11); Return10: return(idx + 10); Return9: return(idx + 9); Return8: return(idx + 8); Return7: return(idx + 7); Return6: return(idx + 6); Return5: return(idx + 5); Return4: return(idx + 4); Return3: return(idx + 3); Return2: return(idx + 2); Return1: return(idx + 1); #endif Return: return(idx); } }