public override int Peek() { if (_index >= _buffer.Length) { return(-1); } var utf8Unread = _buffer.Span.Slice(_index); char result = default(char); unsafe { var destination = new Span <char>(&result, 1).AsBytes(); if (Utf8.ToUtf16(utf8Unread, destination, out int consumed, out int written) == TransformationStatus.InvalidData) { throw new Exception("invalid UTF8 byte at " + _index.ToString()); } } return(result); }
private static bool TryDecodeUtf8(ReadOnlySpan <byte> input, out string decoded) { char[] rented = ArrayPool <char> .Shared.Rent(input.Length); try { if (Utf8.ToUtf16(input, rented, out _, out int charsWritten, replaceInvalidSequences: false) == OperationStatus.Done) { decoded = new string(rented, 0, charsWritten); return(true); } } finally { ArrayPool <char> .Shared.Return(rented); } decoded = null; return(false); }
private static void ToChars_Test_Core(ReadOnlySpan <byte> utf8Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumBytesRead, ReadOnlySpan <char> expectedUtf16Transcoding) { // Arrange using (BoundedMemory <byte> boundedSource = BoundedMemory.AllocateFromExistingData(utf8Input)) using (BoundedMemory <char> boundedDestination = BoundedMemory.Allocate <char>(destinationSize)) { boundedSource.MakeReadonly(); // Act OperationStatus actualOperationStatus = Utf8.ToUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumBytesRead, out int actualNumCharsWritten, replaceInvalidSequences, isFinalChunk); // Assert Assert.Equal(expectedOperationStatus, actualOperationStatus); Assert.Equal(expectedNumBytesRead, actualNumBytesRead); Assert.Equal(expectedUtf16Transcoding.Length, actualNumCharsWritten); Assert.Equal(expectedUtf16Transcoding.ToString(), boundedDestination.Span.Slice(0, actualNumCharsWritten).ToString()); } }
public static unsafe string BufferToString(ReadOnlySpan <byte> buffer) { int size = buffer.IndexOf(byte.MinValue); if (size == 0) { return(string.Empty); } if ((uint)size < (uint)buffer.Length) { buffer = buffer.Slice(0, size); } if (buffer.Length <= 1024) { Span <char> tmp = stackalloc char[buffer.Length]; var res = Utf8.ToUtf16(buffer, tmp, out _, out int written); Debug.Assert(res == OperationStatus.Done); return(new string(tmp.Slice(0, written))); } else { var pool = ArrayPool <char> .Shared; char[] arr = pool.Rent(buffer.Length); var res = Utf8.ToUtf16(buffer, arr, out _, out int written); Debug.Assert(res == OperationStatus.Done); var ret = new string(arr, 0, written); pool.Return(arr); return(ret); } }
// TODO: this should be moved to System.Text.Primitives. Probably to Utf8 class static string Utf8ToString(ReadOnlySpan <byte> utf8) { var result = Utf8.ToUtf16Length(utf8, out int bytesNeeded); if (result == TransformationStatus.InvalidData || result == TransformationStatus.NeedMoreSourceData) { throw new Exception("invalid UTF8 byte"); } var str = new string(' ', bytesNeeded / sizeof(char)); unsafe { fixed(char *pStr = str) { var strSpan = new Span <char>(pStr, str.Length); if (Utf8.ToUtf16(utf8, strSpan.AsBytes(), out int consumed, out int written) != TransformationStatus.Done) { throw new Exception(); } } } return(str); }
/// <summary> /// Converts a span containing a sequence of UTF-8 bytes into UTF-16 bytes. /// /// This method will consume as many of the input bytes as possible. /// /// On successful exit, the entire input was consumed and encoded successfully. In this case, <paramref name="bytesConsumed"/> will be /// equal to the length of the <paramref name="source"/> and <paramref name="bytesWritten"/> will equal the total number of bytes written to /// the <paramref name="destination"/>. /// </summary> /// <param name="source">A span containing a sequence of UTF-8 bytes.</param> /// <param name="destination">A span to write the UTF-16 bytes into.</param> /// <param name="bytesConsumed">On exit, contains the number of bytes that were consumed from the <paramref name="source"/>.</param> /// <param name="bytesWritten">On exit, contains the number of bytes written to <paramref name="destination"/></param> /// <returns>A <see cref="OperationStatus"/> value representing the state of the conversion.</returns> public static OperationStatus FromUtf8(ReadOnlySpan <byte> source, Span <byte> destination, out int bytesConsumed, out int bytesWritten) => Utf8.ToUtf16(source, destination, out bytesConsumed, out bytesWritten);
public void RunTest() { Console.WriteLine("-- BEGIN TEST --"); int encodingCharCount = Encoding.UTF8.GetCharCount(_data.Span); Console.WriteLine($"Encoding.UTF8.GetCharCount returned {encodingCharCount}."); { ReadOnlySpan <byte> input = _data.Span; int runeIterCharCount = 0; while (!input.IsEmpty) { Rune.DecodeFromUtf8(input, out Rune thisRune, out int bytesConsumed); runeIterCharCount += thisRune.Utf16SequenceLength; // ok if U+FFFD replacement input = input.Slice(bytesConsumed); } Console.WriteLine($"Rune iteration said there were {runeIterCharCount} UTF-16 chars."); if (encodingCharCount != runeIterCharCount) { throw new Exception("Rune iteration char count mismatch!!"); } } char[] chars = new char[encodingCharCount]; int charsWritten = Encoding.UTF8.GetChars(_data.Span, chars); Console.WriteLine($"Encoding.UTF8.GetChars returned {charsWritten} chars written."); if (encodingCharCount != charsWritten) { throw new Exception("GetChars return value mismatch!!"); } { ReadOnlySpan <byte> inputUtf8 = _data.Span; ReadOnlySpan <char> inputUtf16 = chars; while (!inputUtf8.IsEmpty && !inputUtf16.IsEmpty) { Rune.DecodeFromUtf8(inputUtf8, out Rune inputUtf8Rune, out int bytesConsumed); Rune.DecodeFromUtf16(inputUtf16, out Rune inputUtf16Rune, out int charsConsumed); if (inputUtf8Rune != inputUtf16Rune) { throw new Exception("Enumerating runes mismatch!!"); } inputUtf8 = inputUtf8.Slice(bytesConsumed); inputUtf16 = inputUtf16.Slice(charsConsumed); } if (inputUtf8.Length != inputUtf16.Length) { throw new Exception("Rune enumeration returned mismatched lengths!"); } } Console.WriteLine("Running ToUtf16 with replace=true and exact size buffer."); { char[] chars2 = new char[chars.Length]; OperationStatus opStatus = Utf8.ToUtf16(_data.Span, chars2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: true, isFinalBlock: true); if (opStatus != OperationStatus.Done) { throw new Exception("Utf8.ToUtf16 returned wrong OperationStatus!!"); } if (bytesReadJustNow != _data.Memory.Length) { throw new Exception("Utf8.ToUtf16 didn't read entire input!!"); } if (charsWrittenJustNow != chars2.Length) { throw new Exception("Utf8.ToUtf16 didn't fill entire response buffer!!"); } if (!chars.SequenceEqual(chars2)) { throw new Exception("Utf8.ToUtf16 returned different data than Encoding.UTF8.GetChars!!"); } } Console.WriteLine("Running ToUtf16 with replace=true and extra large buffer."); { char[] chars2 = new char[chars.Length + 1024]; OperationStatus opStatus = Utf8.ToUtf16(_data.Span, chars2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: true, isFinalBlock: true); if (opStatus != OperationStatus.Done) { throw new Exception("Utf8.ToUtf16 returned wrong OperationStatus!!"); } if (bytesReadJustNow != _data.Memory.Length) { throw new Exception("Utf8.ToUtf16 didn't read entire input!!"); } if (charsWrittenJustNow != chars.Length) { throw new Exception("Utf8.ToUtf16 didn't fill entire response buffer!!"); } if (!chars2.AsSpan(0, charsWrittenJustNow).SequenceEqual(chars)) { throw new Exception("Utf8.ToUtf16 returned different data than Encoding.UTF8.GetChars!!"); } } Console.WriteLine("Running ToUtf16 with replace=false and extra large buffer."); { ReadOnlySpan <byte> input = _data.Span; Span <char> output = new char[chars.Length + 1024]; while (!input.IsEmpty) { OperationStatus opStatus = Utf8.ToUtf16(input, output, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: false, isFinalBlock: true); ReadOnlySpan <byte> dataReadJustNow = input.Slice(0, bytesReadJustNow); ReadOnlySpan <char> dataWrittenJustNow = output.Slice(0, charsWrittenJustNow); while (!dataReadJustNow.IsEmpty && !dataWrittenJustNow.IsEmpty) { OperationStatus utf8Status = Rune.DecodeFromUtf8(dataReadJustNow, out Rune inputUtf8Rune, out int bytesConsumed); OperationStatus utf16Status = Rune.DecodeFromUtf16(dataWrittenJustNow, out Rune inputUtf16Rune, out int charsConsumed); if (utf8Status != OperationStatus.Done) { throw new Exception("DecodeFromUtf8 returned unexpected value!!"); } if (utf16Status != OperationStatus.Done) { throw new Exception("DecodeFromUtf16 returned unexpected value!!"); } if (inputUtf8Rune != inputUtf16Rune) { throw new Exception("Enumerating runes mismatch!!"); } dataReadJustNow = dataReadJustNow.Slice(bytesConsumed); dataWrittenJustNow = dataWrittenJustNow.Slice(charsConsumed); } if (dataReadJustNow.Length != dataWrittenJustNow.Length) { throw new Exception("Unexpected length mismatch!!"); } input = input.Slice(bytesReadJustNow); if (opStatus != OperationStatus.Done) { // Skip over invalid data Rune.DecodeFromUtf8(input, out _, out int bytesToSkip); input = input.Slice(bytesToSkip); } } } Console.WriteLine("Trying custom decoder replacement."); { // use a custom replacement string Encoding encoding = Encoding.GetEncoding("utf-8", EncoderFallback.ExceptionFallback, new DecoderReplacementFallback("{BAD}")); string decoded = encoding.GetString(_data.Span); ReadOnlySpan <byte> input = _data.Span; char[] decoded2 = new char[decoded.Length]; StringBuilder builder = new StringBuilder(); while (!input.IsEmpty) { OperationStatus opStatus = Utf8.ToUtf16(input, decoded2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: false, isFinalBlock: true); builder.Append(decoded2, 0, charsWrittenJustNow); input = input.Slice(bytesReadJustNow); if (opStatus != OperationStatus.Done) { // Skip over invalid data Rune.DecodeFromUtf8(input, out _, out int bytesToSkip); input = input.Slice(bytesToSkip); builder.Append("{BAD}"); } } if (new string(decoded) != builder.ToString()) { throw new Exception("Custom decoder replacement failed!!"); } } Console.WriteLine("-- END TEST - SUCCESS --"); }
private static void EscapeString(ReadOnlySpan <char> value, Span <char> destination, JavaScriptEncoder encoder, ref int written) { // todo: issue #39523: add an Encode(ReadOnlySpan<char>) decode API to System.Text.Encodings.Web.TextEncoding to avoid utf16->utf8->utf16 conversion. Debug.Assert(encoder != null); // Convert char to byte. byte[] utf8DestinationArray = null; Span <byte> utf8Destination; int length = checked ((value.Length) * JsonConstants.MaxExpansionFactorWhileTranscoding); if (length > JsonConstants.StackallocThreshold) { utf8DestinationArray = ArrayPool <byte> .Shared.Rent(length); utf8Destination = utf8DestinationArray; } else { unsafe { byte *ptr = stackalloc byte[JsonConstants.StackallocThreshold]; utf8Destination = new Span <byte>(ptr, JsonConstants.StackallocThreshold); } } ReadOnlySpan <byte> utf16Value = MemoryMarshal.AsBytes(value); OperationStatus toUtf8Status = ToUtf8(utf16Value, utf8Destination, out int bytesConsumed, out int bytesWritten); Debug.Assert(toUtf8Status != OperationStatus.DestinationTooSmall); Debug.Assert(toUtf8Status != OperationStatus.NeedMoreData); if (toUtf8Status != OperationStatus.Done) { if (utf8DestinationArray != null) { utf8Destination.Slice(0, bytesWritten).Clear(); ArrayPool <byte> .Shared.Return(utf8DestinationArray); } ThrowHelper.ThrowArgumentException_InvalidUTF8(utf16Value.Slice(bytesWritten)); } Debug.Assert(toUtf8Status == OperationStatus.Done); Debug.Assert(bytesConsumed == utf16Value.Length); // Escape the bytes. byte[] utf8ConvertedDestinationArray = null; Span <byte> utf8ConvertedDestination; length = checked (bytesWritten * JsonConstants.MaxExpansionFactorWhileEscaping); if (length > JsonConstants.StackallocThreshold) { utf8ConvertedDestinationArray = ArrayPool <byte> .Shared.Rent(length); utf8ConvertedDestination = utf8ConvertedDestinationArray; } else { unsafe { byte *ptr = stackalloc byte[JsonConstants.StackallocThreshold]; utf8ConvertedDestination = new Span <byte>(ptr, JsonConstants.StackallocThreshold); } } EscapeString(utf8Destination.Slice(0, bytesWritten), utf8ConvertedDestination, indexOfFirstByteToEscape: 0, encoder, out int convertedBytesWritten); if (utf8DestinationArray != null) { utf8Destination.Slice(0, bytesWritten).Clear(); ArrayPool <byte> .Shared.Return(utf8DestinationArray); } // Convert byte to char. #if BUILDING_INBOX_LIBRARY OperationStatus toUtf16Status = Utf8.ToUtf16(utf8ConvertedDestination.Slice(0, convertedBytesWritten), destination, out int bytesRead, out int charsWritten); Debug.Assert(toUtf16Status == OperationStatus.Done); Debug.Assert(bytesRead == convertedBytesWritten); #else string utf16 = JsonReaderHelper.GetTextFromUtf8(utf8ConvertedDestination.Slice(0, convertedBytesWritten)); utf16.AsSpan().CopyTo(destination); int charsWritten = utf16.Length; #endif written += charsWritten; if (utf8ConvertedDestinationArray != null) { utf8ConvertedDestination.Slice(0, written).Clear(); ArrayPool <byte> .Shared.Return(utf8ConvertedDestinationArray); } }