public static bool IsValidUtf8Slice(ReadOnlySpan <byte> span, int start, int count)
        {
            ReadOnlySpan <byte> slice = span.Slice(start, count);

            int sliceStart  = start;
            int startOffset = 0;

            for (; startOffset < 3 && sliceStart > 0; startOffset++)
            {
                sliceStart--;
            }

            if (sliceStart == 0 && count == span.Length)
            {
                return(true);
            }

            OperationStatus statusOfLast = Rune.DecodeLastFromUtf8(slice, out _, out _);

            if (statusOfLast != OperationStatus.Done)
            {
                return(false);
            }

            for (int i = startOffset; i > 0;)
            {
                ReadOnlySpan <byte> preSlice = span[(start - i)..];
        /// <summary>
        /// Returns the index in <paramref name="utf8Data"/> where the trailing whitespace sequence
        /// begins, or 0 if the data contains only whitespace characters, or the span length if the
        /// data does not end with any whitespace characters.
        /// </summary>
        public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan <byte> utf8Data)
        {
            // This method is optimized for the case where the input data is ASCII, and if the
            // data does need to be trimmed it's likely that only a relatively small number of
            // bytes will be trimmed.

            int length = utf8Data.Length;

            while (length > 0)
            {
                // Very quick check: see if the byte is in the range [ 21 .. 7F ].
                // If so, we can skip the more expensive logic later in this method.

                if ((sbyte)utf8Data[length - 1] > (sbyte)0x20)
                {
                    break;
                }

                uint possibleAsciiByte = utf8Data[length - 1];
                if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte))
                {
                    // The simple comparison failed. Let's read the actual byte value,
                    // and if it's ASCII we can delegate to Rune's inlined method
                    // implementation.

                    if (Rune.IsWhiteSpace(new Rune(possibleAsciiByte)))
                    {
                        length--;
                        continue;
                    }
                }
                else
                {
                    // Not ASCII data. Go back to the slower "decode the entire scalar"
                    // code path, then compare it against our Unicode tables.

                    Rune.DecodeLastFromUtf8(utf8Data.Slice(0, length), out Rune decodedRune, out int bytesConsumed);
                    if (Rune.IsWhiteSpace(decodedRune))
                    {
                        length -= bytesConsumed;
                        continue;
                    }
                }

                break; // If we got here, we saw a non-whitespace subsequence.
            }

            return(length);
        }
Пример #3
0
        public static void Trim(string input)
        {
            // Arrange

            using BoundedUtf8Span boundedSpan = new BoundedUtf8Span(input);
            Utf8Span span = boundedSpan.Span;

            // Act

            Utf8Span trimmed = span.Trim();

            // Assert
            // Compute the trim manually and ensure it matches the trimmed span's characteristics.

            ReadOnlySpan <byte> utf8Bytes = span.Bytes;

            while (!utf8Bytes.IsEmpty)
            {
                OperationStatus status = Rune.DecodeFromUtf8(utf8Bytes, out Rune decodedRune, out int bytesConsumed);
                Assert.Equal(OperationStatus.Done, status);

                if (!Rune.IsWhiteSpace(decodedRune))
                {
                    break;
                }

                utf8Bytes = utf8Bytes.Slice(bytesConsumed);
            }
            while (!utf8Bytes.IsEmpty)
            {
                OperationStatus status = Rune.DecodeLastFromUtf8(utf8Bytes, out Rune decodedRune, out int bytesConsumed);
                Assert.Equal(OperationStatus.Done, status);

                if (!Rune.IsWhiteSpace(decodedRune))
                {
                    break;
                }

                utf8Bytes = utf8Bytes[..^ bytesConsumed];
Пример #4
0
 [InlineData(new byte[] { 0xF0, 0x9F, 0x98, }, OperationStatus.NeedMoreData, 0xFFFD, 3)]      // [ F0 9F 98 ] is valid 3-byte start of 4-byte sequence
 public static void DecodeLastFromUtf8(byte[] data, OperationStatus expectedOperationStatus, int expectedRuneValue, int expectedBytesConsumed)
 {
     Assert.Equal(expectedOperationStatus, Rune.DecodeLastFromUtf8(data, out Rune actualRune, out int actualBytesConsumed));
     Assert.Equal(expectedRuneValue, actualRune.Value);
     Assert.Equal(expectedBytesConsumed, actualBytesConsumed);
 }