Esempio n. 1
0
        public override int Peek()
        {
            if (_index >= _buffer.Length)
            {
                return(-1);
            }
            var  utf8Unread = _buffer.Span.Slice(_index);
            char result     = default(char);

            unsafe
            {
                var destination = new Span <char>(&result, 1).AsBytes();
                if (Utf8.ToUtf16(utf8Unread, destination, out int consumed, out int written) == TransformationStatus.InvalidData)
                {
                    throw new Exception("invalid UTF8 byte at " + _index.ToString());
                }
            }
            return(result);
        }
Esempio n. 2
0
        private static bool TryDecodeUtf8(ReadOnlySpan <byte> input, out string decoded)
        {
            char[] rented = ArrayPool <char> .Shared.Rent(input.Length);

            try
            {
                if (Utf8.ToUtf16(input, rented, out _, out int charsWritten, replaceInvalidSequences: false) == OperationStatus.Done)
                {
                    decoded = new string(rented, 0, charsWritten);
                    return(true);
                }
            }
            finally
            {
                ArrayPool <char> .Shared.Return(rented);
            }

            decoded = null;
            return(false);
        }
        private static void ToChars_Test_Core(ReadOnlySpan <byte> utf8Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumBytesRead, ReadOnlySpan <char> expectedUtf16Transcoding)
        {
            // Arrange

            using (BoundedMemory <byte> boundedSource = BoundedMemory.AllocateFromExistingData(utf8Input))
                using (BoundedMemory <char> boundedDestination = BoundedMemory.Allocate <char>(destinationSize))
                {
                    boundedSource.MakeReadonly();

                    // Act

                    OperationStatus actualOperationStatus = Utf8.ToUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumBytesRead, out int actualNumCharsWritten, replaceInvalidSequences, isFinalChunk);

                    // Assert

                    Assert.Equal(expectedOperationStatus, actualOperationStatus);
                    Assert.Equal(expectedNumBytesRead, actualNumBytesRead);
                    Assert.Equal(expectedUtf16Transcoding.Length, actualNumCharsWritten);
                    Assert.Equal(expectedUtf16Transcoding.ToString(), boundedDestination.Span.Slice(0, actualNumCharsWritten).ToString());
                }
        }
Esempio n. 4
0
        public static unsafe string BufferToString(ReadOnlySpan <byte> buffer)
        {
            int size = buffer.IndexOf(byte.MinValue);

            if (size == 0)
            {
                return(string.Empty);
            }

            if ((uint)size < (uint)buffer.Length)
            {
                buffer = buffer.Slice(0, size);
            }

            if (buffer.Length <= 1024)
            {
                Span <char> tmp = stackalloc char[buffer.Length];

                var res = Utf8.ToUtf16(buffer, tmp, out _, out int written);
                Debug.Assert(res == OperationStatus.Done);

                return(new string(tmp.Slice(0, written)));
            }
            else
            {
                var pool = ArrayPool <char> .Shared;

                char[] arr = pool.Rent(buffer.Length);

                var res = Utf8.ToUtf16(buffer, arr, out _, out int written);
                Debug.Assert(res == OperationStatus.Done);

                var ret = new string(arr, 0, written);

                pool.Return(arr);

                return(ret);
            }
        }
Esempio n. 5
0
        // TODO: this should be moved to System.Text.Primitives. Probably to Utf8 class
        static string Utf8ToString(ReadOnlySpan <byte> utf8)
        {
            var result = Utf8.ToUtf16Length(utf8, out int bytesNeeded);

            if (result == TransformationStatus.InvalidData || result == TransformationStatus.NeedMoreSourceData)
            {
                throw new Exception("invalid UTF8 byte");
            }
            var str = new string(' ', bytesNeeded / sizeof(char));

            unsafe
            {
                fixed(char *pStr = str)
                {
                    var strSpan = new Span <char>(pStr, str.Length);

                    if (Utf8.ToUtf16(utf8, strSpan.AsBytes(), out int consumed, out int written) != TransformationStatus.Done)
                    {
                        throw new Exception();
                    }
                }
            }
            return(str);
        }
Esempio n. 6
0
 /// <summary>
 /// Converts a span containing a sequence of UTF-8 bytes into UTF-16 bytes.
 ///
 /// This method will consume as many of the input bytes as possible.
 ///
 /// On successful exit, the entire input was consumed and encoded successfully. In this case, <paramref name="bytesConsumed"/> will be
 /// equal to the length of the <paramref name="source"/> and <paramref name="bytesWritten"/> will equal the total number of bytes written to
 /// the <paramref name="destination"/>.
 /// </summary>
 /// <param name="source">A span containing a sequence of UTF-8 bytes.</param>
 /// <param name="destination">A span to write the UTF-16 bytes into.</param>
 /// <param name="bytesConsumed">On exit, contains the number of bytes that were consumed from the <paramref name="source"/>.</param>
 /// <param name="bytesWritten">On exit, contains the number of bytes written to <paramref name="destination"/></param>
 /// <returns>A <see cref="OperationStatus"/> value representing the state of the conversion.</returns>
 public static OperationStatus FromUtf8(ReadOnlySpan <byte> source, Span <byte> destination, out int bytesConsumed, out int bytesWritten)
 => Utf8.ToUtf16(source, destination, out bytesConsumed, out bytesWritten);
Esempio n. 7
0
        public void RunTest()
        {
            Console.WriteLine("-- BEGIN TEST --");

            int encodingCharCount = Encoding.UTF8.GetCharCount(_data.Span);

            Console.WriteLine($"Encoding.UTF8.GetCharCount returned {encodingCharCount}.");

            {
                ReadOnlySpan <byte> input = _data.Span;
                int runeIterCharCount     = 0;
                while (!input.IsEmpty)
                {
                    Rune.DecodeFromUtf8(input, out Rune thisRune, out int bytesConsumed);
                    runeIterCharCount += thisRune.Utf16SequenceLength; // ok if U+FFFD replacement
                    input              = input.Slice(bytesConsumed);
                }

                Console.WriteLine($"Rune iteration said there were {runeIterCharCount} UTF-16 chars.");

                if (encodingCharCount != runeIterCharCount)
                {
                    throw new Exception("Rune iteration char count mismatch!!");
                }
            }

            char[] chars        = new char[encodingCharCount];
            int    charsWritten = Encoding.UTF8.GetChars(_data.Span, chars);

            Console.WriteLine($"Encoding.UTF8.GetChars returned {charsWritten} chars written.");

            if (encodingCharCount != charsWritten)
            {
                throw new Exception("GetChars return value mismatch!!");
            }

            {
                ReadOnlySpan <byte> inputUtf8  = _data.Span;
                ReadOnlySpan <char> inputUtf16 = chars;

                while (!inputUtf8.IsEmpty && !inputUtf16.IsEmpty)
                {
                    Rune.DecodeFromUtf8(inputUtf8, out Rune inputUtf8Rune, out int bytesConsumed);
                    Rune.DecodeFromUtf16(inputUtf16, out Rune inputUtf16Rune, out int charsConsumed);

                    if (inputUtf8Rune != inputUtf16Rune)
                    {
                        throw new Exception("Enumerating runes mismatch!!");
                    }

                    inputUtf8  = inputUtf8.Slice(bytesConsumed);
                    inputUtf16 = inputUtf16.Slice(charsConsumed);
                }

                if (inputUtf8.Length != inputUtf16.Length)
                {
                    throw new Exception("Rune enumeration returned mismatched lengths!");
                }
            }

            Console.WriteLine("Running ToUtf16 with replace=true and exact size buffer.");

            {
                char[]          chars2   = new char[chars.Length];
                OperationStatus opStatus = Utf8.ToUtf16(_data.Span, chars2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: true, isFinalBlock: true);

                if (opStatus != OperationStatus.Done)
                {
                    throw new Exception("Utf8.ToUtf16 returned wrong OperationStatus!!");
                }

                if (bytesReadJustNow != _data.Memory.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't read entire input!!");
                }

                if (charsWrittenJustNow != chars2.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't fill entire response buffer!!");
                }

                if (!chars.SequenceEqual(chars2))
                {
                    throw new Exception("Utf8.ToUtf16 returned different data than Encoding.UTF8.GetChars!!");
                }
            }

            Console.WriteLine("Running ToUtf16 with replace=true and extra large buffer.");

            {
                char[]          chars2   = new char[chars.Length + 1024];
                OperationStatus opStatus = Utf8.ToUtf16(_data.Span, chars2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: true, isFinalBlock: true);

                if (opStatus != OperationStatus.Done)
                {
                    throw new Exception("Utf8.ToUtf16 returned wrong OperationStatus!!");
                }

                if (bytesReadJustNow != _data.Memory.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't read entire input!!");
                }

                if (charsWrittenJustNow != chars.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't fill entire response buffer!!");
                }

                if (!chars2.AsSpan(0, charsWrittenJustNow).SequenceEqual(chars))
                {
                    throw new Exception("Utf8.ToUtf16 returned different data than Encoding.UTF8.GetChars!!");
                }
            }

            Console.WriteLine("Running ToUtf16 with replace=false and extra large buffer.");

            {
                ReadOnlySpan <byte> input  = _data.Span;
                Span <char>         output = new char[chars.Length + 1024];

                while (!input.IsEmpty)
                {
                    OperationStatus opStatus = Utf8.ToUtf16(input, output, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: false, isFinalBlock: true);

                    ReadOnlySpan <byte> dataReadJustNow    = input.Slice(0, bytesReadJustNow);
                    ReadOnlySpan <char> dataWrittenJustNow = output.Slice(0, charsWrittenJustNow);

                    while (!dataReadJustNow.IsEmpty && !dataWrittenJustNow.IsEmpty)
                    {
                        OperationStatus utf8Status  = Rune.DecodeFromUtf8(dataReadJustNow, out Rune inputUtf8Rune, out int bytesConsumed);
                        OperationStatus utf16Status = Rune.DecodeFromUtf16(dataWrittenJustNow, out Rune inputUtf16Rune, out int charsConsumed);

                        if (utf8Status != OperationStatus.Done)
                        {
                            throw new Exception("DecodeFromUtf8 returned unexpected value!!");
                        }

                        if (utf16Status != OperationStatus.Done)
                        {
                            throw new Exception("DecodeFromUtf16 returned unexpected value!!");
                        }

                        if (inputUtf8Rune != inputUtf16Rune)
                        {
                            throw new Exception("Enumerating runes mismatch!!");
                        }

                        dataReadJustNow    = dataReadJustNow.Slice(bytesConsumed);
                        dataWrittenJustNow = dataWrittenJustNow.Slice(charsConsumed);
                    }

                    if (dataReadJustNow.Length != dataWrittenJustNow.Length)
                    {
                        throw new Exception("Unexpected length mismatch!!");
                    }

                    input = input.Slice(bytesReadJustNow);

                    if (opStatus != OperationStatus.Done)
                    {
                        // Skip over invalid data

                        Rune.DecodeFromUtf8(input, out _, out int bytesToSkip);
                        input = input.Slice(bytesToSkip);
                    }
                }
            }

            Console.WriteLine("Trying custom decoder replacement.");

            {
                // use a custom replacement string
                Encoding encoding = Encoding.GetEncoding("utf-8", EncoderFallback.ExceptionFallback, new DecoderReplacementFallback("{BAD}"));

                string decoded = encoding.GetString(_data.Span);

                ReadOnlySpan <byte> input = _data.Span;
                char[]        decoded2    = new char[decoded.Length];
                StringBuilder builder     = new StringBuilder();

                while (!input.IsEmpty)
                {
                    OperationStatus opStatus = Utf8.ToUtf16(input, decoded2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: false, isFinalBlock: true);
                    builder.Append(decoded2, 0, charsWrittenJustNow);

                    input = input.Slice(bytesReadJustNow);

                    if (opStatus != OperationStatus.Done)
                    {
                        // Skip over invalid data

                        Rune.DecodeFromUtf8(input, out _, out int bytesToSkip);
                        input = input.Slice(bytesToSkip);

                        builder.Append("{BAD}");
                    }
                }

                if (new string(decoded) != builder.ToString())
                {
                    throw new Exception("Custom decoder replacement failed!!");
                }
            }

            Console.WriteLine("-- END TEST - SUCCESS --");
        }
        private static void EscapeString(ReadOnlySpan <char> value, Span <char> destination, JavaScriptEncoder encoder, ref int written)
        {
            // todo: issue #39523: add an Encode(ReadOnlySpan<char>) decode API to System.Text.Encodings.Web.TextEncoding to avoid utf16->utf8->utf16 conversion.

            Debug.Assert(encoder != null);

            // Convert char to byte.
            byte[]      utf8DestinationArray = null;
            Span <byte> utf8Destination;
            int         length = checked ((value.Length) * JsonConstants.MaxExpansionFactorWhileTranscoding);

            if (length > JsonConstants.StackallocThreshold)
            {
                utf8DestinationArray = ArrayPool <byte> .Shared.Rent(length);

                utf8Destination = utf8DestinationArray;
            }
            else
            {
                unsafe
                {
                    byte *ptr = stackalloc byte[JsonConstants.StackallocThreshold];
                    utf8Destination = new Span <byte>(ptr, JsonConstants.StackallocThreshold);
                }
            }

            ReadOnlySpan <byte> utf16Value   = MemoryMarshal.AsBytes(value);
            OperationStatus     toUtf8Status = ToUtf8(utf16Value, utf8Destination, out int bytesConsumed, out int bytesWritten);

            Debug.Assert(toUtf8Status != OperationStatus.DestinationTooSmall);
            Debug.Assert(toUtf8Status != OperationStatus.NeedMoreData);

            if (toUtf8Status != OperationStatus.Done)
            {
                if (utf8DestinationArray != null)
                {
                    utf8Destination.Slice(0, bytesWritten).Clear();
                    ArrayPool <byte> .Shared.Return(utf8DestinationArray);
                }

                ThrowHelper.ThrowArgumentException_InvalidUTF8(utf16Value.Slice(bytesWritten));
            }

            Debug.Assert(toUtf8Status == OperationStatus.Done);
            Debug.Assert(bytesConsumed == utf16Value.Length);

            // Escape the bytes.
            byte[]      utf8ConvertedDestinationArray = null;
            Span <byte> utf8ConvertedDestination;

            length = checked (bytesWritten * JsonConstants.MaxExpansionFactorWhileEscaping);
            if (length > JsonConstants.StackallocThreshold)
            {
                utf8ConvertedDestinationArray = ArrayPool <byte> .Shared.Rent(length);

                utf8ConvertedDestination = utf8ConvertedDestinationArray;
            }
            else
            {
                unsafe
                {
                    byte *ptr = stackalloc byte[JsonConstants.StackallocThreshold];
                    utf8ConvertedDestination = new Span <byte>(ptr, JsonConstants.StackallocThreshold);
                }
            }

            EscapeString(utf8Destination.Slice(0, bytesWritten), utf8ConvertedDestination, indexOfFirstByteToEscape: 0, encoder, out int convertedBytesWritten);

            if (utf8DestinationArray != null)
            {
                utf8Destination.Slice(0, bytesWritten).Clear();
                ArrayPool <byte> .Shared.Return(utf8DestinationArray);
            }

            // Convert byte to char.
#if BUILDING_INBOX_LIBRARY
            OperationStatus toUtf16Status = Utf8.ToUtf16(utf8ConvertedDestination.Slice(0, convertedBytesWritten), destination, out int bytesRead, out int charsWritten);
            Debug.Assert(toUtf16Status == OperationStatus.Done);
            Debug.Assert(bytesRead == convertedBytesWritten);
#else
            string utf16 = JsonReaderHelper.GetTextFromUtf8(utf8ConvertedDestination.Slice(0, convertedBytesWritten));
            utf16.AsSpan().CopyTo(destination);
            int charsWritten = utf16.Length;
#endif
            written += charsWritten;

            if (utf8ConvertedDestinationArray != null)
            {
                utf8ConvertedDestination.Slice(0, written).Clear();
                ArrayPool <byte> .Shared.Return(utf8ConvertedDestinationArray);
            }
        }