/// <summary> /// Transcodes the UTF-8 <paramref name="source"/> buffer to <paramref name="destination"/> as UTF-16. /// </summary> /// <remarks> /// If <paramref name="replaceInvalidSequences"/> is <see langword="true"/>, invalid UTF-8 sequences /// in <paramref name="source"/> will be replaced with U+FFFD in <paramref name="destination"/>, and /// this method will not return <see cref="OperationStatus.InvalidData"/>. /// </remarks> public static OperationStatus ToUtf16(ReadOnlySpan <byte> source, Span <char> destination, out int numBytesRead, out int numCharsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { int originalSourceLength = source.Length; int originalDestinationLength = destination.Length; OperationStatus status = OperationStatus.Done; // In a loop, this is going to read and transcode one scalar value at a time // from the source to the destination. while (!source.IsEmpty) { status = Rune.DecodeUtf8(source, out Rune firstScalarValue, out int bytesConsumed); switch (status) { case OperationStatus.NeedMoreData: // Input buffer ended with a partial UTF-8 sequence. Only treat this as an error // if the caller told us that we shouldn't expect additional data in a // future call. if (!isFinalBlock) { goto Finish; } status = OperationStatus.InvalidData; goto case OperationStatus.InvalidData; case OperationStatus.InvalidData: // Input buffer contained invalid data. If the caller told us not to // perform U+FFFD replacement, terminate the loop immediately and return // an error to the caller. if (!replaceInvalidSequences) { goto Finish; } firstScalarValue = Rune.ReplacementChar; goto default; default: // We know which scalar value we need to transcode to UTF-16. // Do so now, and only terminate the loop if we ran out of space // in the destination buffer. if (firstScalarValue.TryEncode(destination, out int charsWritten)) { source = source.Slice(bytesConsumed); // don't use Rune.Utf16SequenceLength; we may have performed substitution destination = destination.Slice(charsWritten); status = OperationStatus.Done; // forcibly set success continue; } else { status = OperationStatus.DestinationTooSmall; goto Finish; } } } Finish: numBytesRead = originalSourceLength - source.Length; numCharsWritten = originalDestinationLength - destination.Length; Debug.Assert(numBytesRead < originalSourceLength || status != OperationStatus.Done, "Cannot report OperationStatus.Done if we haven't consumed the entire input buffer."); return(status); }
[InlineData(new byte[] { 0xF0, 0x9F, 0x98, 0xB2 }, OperationStatus.Done, 0x1F632, 4)] // [ F0 9F 98 B2 ] is U+1F632 ASTONISHED FACE public static void DecodeUtf8(byte[] data, OperationStatus expectedOperationStatus, int expectedRuneValue, int expectedBytesConsumed) { Assert.Equal(expectedOperationStatus, Rune.DecodeUtf8(data, out Rune actualRune, out int actualBytesConsumed)); Assert.Equal(expectedRuneValue, actualRune.Value); Assert.Equal(expectedBytesConsumed, actualBytesConsumed); }