Пример #1
0
        /*
         * OperationStatus-based APIs for transcoding of chunked data.
         * This method is similar to Encoding.UTF8.GetBytes / GetChars but has a
         * different calling convention, different error handling mechanisms, and
         * different performance characteristics.
         *
         * If 'replaceInvalidSequences' is true, the method will replace any ill-formed
         * subsequence in the source with U+FFFD when transcoding to the destination,
         * then it will continue processing the remainder of the buffers. Otherwise
         * the method will return OperationStatus.InvalidData.
         *
         * If the method does return an error code, the out parameters will represent
         * how much of the data was successfully transcoded, and the location of the
         * ill-formed subsequence can be deduced from these values.
         *
         * If 'replaceInvalidSequences' is true, the method is guaranteed never to return
         * OperationStatus.InvalidData. If 'isFinalBlock' is true, the method is
         * guaranteed never to return OperationStatus.NeedMoreData.
         */

        /// <summary>
        /// Transcodes the UTF-16 <paramref name="source"/> buffer to <paramref name="destination"/> as UTF-8.
        /// </summary>
        /// <remarks>
        /// If <paramref name="replaceInvalidSequences"/> is <see langword="true"/>, invalid UTF-16 sequences
        /// in <paramref name="source"/> will be replaced with U+FFFD in <paramref name="destination"/>, and
        /// this method will not return <see cref="OperationStatus.InvalidData"/>.
        /// </remarks>
        public static OperationStatus FromUtf16(ReadOnlySpan <char> source, Span <byte> destination, out int numCharsRead, out int numBytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true)
        {
            int             originalSourceLength      = source.Length;
            int             originalDestinationLength = destination.Length;
            OperationStatus status = OperationStatus.Done;

            // In a loop, this is going to read and transcode one scalar value at a time
            // from the source to the destination.

            while (!source.IsEmpty)
            {
                status = Rune.DecodeUtf16(source, out Rune firstScalarValue, out int charsConsumed);

                switch (status)
                {
                case OperationStatus.NeedMoreData:

                    // Input buffer ended with a high surrogate. Only treat this as an error
                    // if the caller told us that we shouldn't expect additional data in a
                    // future call.

                    if (!isFinalBlock)
                    {
                        goto Finish;
                    }

                    status = OperationStatus.InvalidData;
                    goto case OperationStatus.InvalidData;

                case OperationStatus.InvalidData:

                    // Input buffer contained invalid data. If the caller told us not to
                    // perform U+FFFD replacement, terminate the loop immediately and return
                    // an error to the caller.

                    if (!replaceInvalidSequences)
                    {
                        goto Finish;
                    }

                    firstScalarValue = Rune.ReplacementChar;
                    goto default;

                default:

                    // We know which scalar value we need to transcode to UTF-8.
                    // Do so now, and only terminate the loop if we ran out of space
                    // in the destination buffer.

                    if (firstScalarValue.TryEncodeToUtf8Bytes(destination, out int bytesWritten))
                    {
                        source      = source.Slice(charsConsumed); // don't use Rune.Utf8SequenceLength; we may have performed substitution
                        destination = destination.Slice(bytesWritten);
                        status      = OperationStatus.Done;        // forcibly set success
                        continue;
                    }
                    else
                    {
                        status = OperationStatus.DestinationTooSmall;
                        goto Finish;
                    }
                }
            }

Finish:

            numCharsRead    = originalSourceLength - source.Length;
            numBytesWritten = originalDestinationLength - destination.Length;

            Debug.Assert(numCharsRead < originalSourceLength || status != OperationStatus.Done,
                         "Cannot report OperationStatus.Done if we haven't consumed the entire input buffer.");

            return(status);
        }
Пример #2
0
 [InlineData(new char[] { '\ud800', '\u1234' }, OperationStatus.InvalidData, 0xFFFD, 1)] // standalone high surrogate
 public static void DecodeUtf16(char[] data, OperationStatus expectedOperationStatus, int expectedRuneValue, int expectedCharsConsumed)
 {
     Assert.Equal(expectedOperationStatus, Rune.DecodeUtf16(data, out Rune actualRune, out int actualCharsConsumed));
     Assert.Equal(expectedRuneValue, actualRune.Value);
     Assert.Equal(expectedCharsConsumed, actualCharsConsumed);
 }