/* * OperationStatus-based APIs for transcoding of chunked data. * This method is similar to Encoding.UTF8.GetBytes / GetChars but has a * different calling convention, different error handling mechanisms, and * different performance characteristics. * * If 'replaceInvalidSequences' is true, the method will replace any ill-formed * subsequence in the source with U+FFFD when transcoding to the destination, * then it will continue processing the remainder of the buffers. Otherwise * the method will return OperationStatus.InvalidData. * * If the method does return an error code, the out parameters will represent * how much of the data was successfully transcoded, and the location of the * ill-formed subsequence can be deduced from these values. * * If 'replaceInvalidSequences' is true, the method is guaranteed never to return * OperationStatus.InvalidData. If 'isFinalBlock' is true, the method is * guaranteed never to return OperationStatus.NeedMoreData. */ /// <summary> /// Transcodes the UTF-16 <paramref name="source"/> buffer to <paramref name="destination"/> as UTF-8. /// </summary> /// <remarks> /// If <paramref name="replaceInvalidSequences"/> is <see langword="true"/>, invalid UTF-16 sequences /// in <paramref name="source"/> will be replaced with U+FFFD in <paramref name="destination"/>, and /// this method will not return <see cref="OperationStatus.InvalidData"/>. /// </remarks> public static OperationStatus FromUtf16(ReadOnlySpan <char> source, Span <byte> destination, out int numCharsRead, out int numBytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { int originalSourceLength = source.Length; int originalDestinationLength = destination.Length; OperationStatus status = OperationStatus.Done; // In a loop, this is going to read and transcode one scalar value at a time // from the source to the destination. while (!source.IsEmpty) { status = Rune.DecodeUtf16(source, out Rune firstScalarValue, out int charsConsumed); switch (status) { case OperationStatus.NeedMoreData: // Input buffer ended with a high surrogate. Only treat this as an error // if the caller told us that we shouldn't expect additional data in a // future call. if (!isFinalBlock) { goto Finish; } status = OperationStatus.InvalidData; goto case OperationStatus.InvalidData; case OperationStatus.InvalidData: // Input buffer contained invalid data. If the caller told us not to // perform U+FFFD replacement, terminate the loop immediately and return // an error to the caller. if (!replaceInvalidSequences) { goto Finish; } firstScalarValue = Rune.ReplacementChar; goto default; default: // We know which scalar value we need to transcode to UTF-8. // Do so now, and only terminate the loop if we ran out of space // in the destination buffer. if (firstScalarValue.TryEncodeToUtf8Bytes(destination, out int bytesWritten)) { source = source.Slice(charsConsumed); // don't use Rune.Utf8SequenceLength; we may have performed substitution destination = destination.Slice(bytesWritten); status = OperationStatus.Done; // forcibly set success continue; } else { status = OperationStatus.DestinationTooSmall; goto Finish; } } } Finish: numCharsRead = originalSourceLength - source.Length; numBytesWritten = originalDestinationLength - destination.Length; Debug.Assert(numCharsRead < originalSourceLength || status != OperationStatus.Done, "Cannot report OperationStatus.Done if we haven't consumed the entire input buffer."); return(status); }
[InlineData(new char[] { '\ud800', '\u1234' }, OperationStatus.InvalidData, 0xFFFD, 1)] // standalone high surrogate public static void DecodeUtf16(char[] data, OperationStatus expectedOperationStatus, int expectedRuneValue, int expectedCharsConsumed) { Assert.Equal(expectedOperationStatus, Rune.DecodeUtf16(data, out Rune actualRune, out int actualCharsConsumed)); Assert.Equal(expectedRuneValue, actualRune.Value); Assert.Equal(expectedCharsConsumed, actualCharsConsumed); }