Ejemplo n.º 1
0
        // Returns &inputBuffer[inputLength] if the input buffer is valid.
        /// <summary>
        /// Given an input buffer <paramref name="pInputBuffer"/> of byte length <paramref name="inputLength"/>,
        /// returns a pointer to where the first invalid data appears in <paramref name="pInputBuffer"/>.
        /// </summary>
        /// <remarks>
        /// Returns a pointer to the end of <paramref name="pInputBuffer"/> if the buffer is well-formed.
        /// </remarks>
        /// <param name="pInputBuffer">Pointer to Utf8 byte buffer</param>
        /// <param name="inputLength">Buffer length in bytes</param>
        /// <param name="utf16CodeUnitCountAdjustment">Zero or negative number to be added to the "bytes processed" return value to come up with the total UTF-16 code unit count.</param>
        /// <param name="scalarCountAdjustment">Zero or negative number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count.</param>
        public static byte *GetPointerToFirstInvalidByte(byte *pInputBuffer, int inputLength, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment)
        {
            Debug.Assert(inputLength >= 0, "Input length must not be negative.");
            Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");

            var input = new ReadOnlySpan <byte>(pInputBuffer, inputLength);
            int cumulativeUtf16CodeUnitCount = 0;
            int cumulativeScalarValueCount   = 0;

            while (!input.IsEmpty)
            {
                if (Rune.DecodeFromUtf8(input, out Rune rune, out int bytesConsumed) != OperationStatus.Done)
                {
                    break;
                }
                input = input.Slice(bytesConsumed);
                cumulativeUtf16CodeUnitCount += rune.Utf16SequenceLength;
                cumulativeScalarValueCount++;
            }

            int cumulativeBytesConsumed = inputLength - input.Length;

            utf16CodeUnitCountAdjustment = cumulativeUtf16CodeUnitCount - cumulativeBytesConsumed;
            scalarCountAdjustment        = cumulativeScalarValueCount - cumulativeUtf16CodeUnitCount;
            return(pInputBuffer + cumulativeBytesConsumed);
        }
Ejemplo n.º 2
0
        private static bool TryParseSearchTermAsRune(object searchTerm, out Rune parsed)
        {
            if (searchTerm is char ch)
            {
                return(Rune.TryCreate(ch, out parsed));
            }
            else if (searchTerm is Rune r)
            {
                parsed = r;
                return(true);
            }
            else if (searchTerm is string str)
            {
                if (Rune.DecodeFromUtf16(str, out parsed, out int charsConsumed) == OperationStatus.Done &&
                    charsConsumed == str.Length)
                {
                    return(true);
                }
            }
            else if (searchTerm is ustring ustr)
            {
                if (Rune.DecodeFromUtf8(ustr.AsBytes(), out parsed, out int bytesConsumed) == OperationStatus.Done &&
                    bytesConsumed == ustr.Length)
                {
                    return(true);
                }
            }

            parsed = default; // failed to turn the search term into a single Rune
            return(false);
        }
Ejemplo n.º 3
0
        // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where
        // the next byte would have been consumed from / the next char would have been written to.
        // inputLength in bytes, outputCharsRemaining in chars.
        public static OperationStatus TranscodeToUtf16(byte *pInputBuffer, int inputLength, char *pOutputBuffer, int outputCharsRemaining, out byte *pInputBufferRemaining, out char *pOutputBufferRemaining)
        {
            Debug.Assert(inputLength >= 0, "Input length must not be negative.");
            Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");

            Debug.Assert(outputCharsRemaining >= 0, "Destination length must not be negative.");
            Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null.");

            var input  = new ReadOnlySpan <byte>(pInputBuffer, inputLength);
            var output = new Span <char>(pOutputBuffer, outputCharsRemaining);

            OperationStatus opStatus = OperationStatus.Done;

            while (!input.IsEmpty)
            {
                opStatus = Rune.DecodeFromUtf8(input, out Rune rune, out int bytesConsumedJustNow);
                if (opStatus != OperationStatus.Done)
                {
                    break;
                }
                if (!rune.TryEncodeToUtf16(output, out int charsWrittenJustNow))
                {
                    opStatus = OperationStatus.DestinationTooSmall; break;
                }
                input  = input.Slice(bytesConsumedJustNow);
                output = output.Slice(charsWrittenJustNow);
            }

            pInputBufferRemaining  = pInputBuffer + inputLength - input.Length;
            pOutputBufferRemaining = pOutputBuffer + outputCharsRemaining - output.Length;

            return(opStatus);
        }
Ejemplo n.º 4
0
        public ValueParseResult TryParse(ReadOnlySpan <byte> readerSpan, out string result, out int consumedLength, out int lineSpan, out int colSpan)
        {
            result         = null;
            consumedLength = 0;
            lineSpan       = 1;
            colSpan        = 0;

            // is input empty
            if (readerSpan.Length <= 0)
            {
                // did any prior processing occur
                return(this._inString
                    ? _cleanup(this, ValueParseResult.FailureEOF)
                    : ValueParseResult.EOF);
            }

            // if we are not continuing, ensure it's a string that's being parsed
            var startPos = 0;

            if (!this._inString)
            {
                if (readerSpan[consumedLength++] != JsonTokens.QuoteMark)
                {
                    if (Rune.DecodeFromUtf8(readerSpan, out var rune, out _) != OperationStatus.Done)
                    {
                        rune = default;
                    }

                    return(_cleanup(this, ValueParseResult.Failure("Unexpected token, expected \".", rune)));
                }

                startPos       = consumedLength;
                this._inString = true;
            }

            // if continuing, check if anything is pending in the buffer
            var         blen    = (int)this._buffContent;
            Span <char> decoded = stackalloc char[512];

            switch (this._buffContent)
            {
            // short escape: \" \\ \/ \b \f \n \r \t
            // long escape: \uXXXX
            case ContentType.EscapeSequence:
            case ContentType.ExtendedEscapeSequence:
                if (this._buffContent != ContentType.ExtendedEscapeSequence && readerSpan[0] == JsonTokens.UnicodePrefix)
                {
                    this._buffContent = ContentType.ExtendedEscapeSequence;
                    blen = (int)this._buffContent;
                }

                if (readerSpan.Length + this._buffPos < blen + 1)
                {
                    readerSpan.CopyTo(this.Buffer[this._buffPos..].Span);
        public Utf8Splitter(ReadOnlySpan <byte> span, ReadOnlySpan <byte> separator, StringSplitOptions splitOptions)
        {
            ReadOnlySpan <byte> separatorSlice = separator;

            do
            {
                var separatorStatus = Rune.DecodeFromUtf8(separatorSlice, out _, out int consumed);
                if (separatorStatus == OperationStatus.InvalidData)
                {
                    throw new ArgumentException("The separator is not valid UTF8.", nameof(separator));
                }
                separatorSlice = separatorSlice[consumed..];
Ejemplo n.º 6
0
        private static bool EndsWithValidMultiByteUtf8Sequence(byte[] input)
        {
            for (int i = input.Length - 1; i >= 0; i--)
            {
                if (input[i] >= 0xC0)
                {
                    return(Rune.DecodeFromUtf8(input.AsSpan(i), out _, out int bytesConsumed) == OperationStatus.Done &&
                           i + bytesConsumed == input.Length);
                }
            }

            return(false); // input was empty?
        }
        /// <summary>
        /// Returns the index in <paramref name="utf8Data"/> where the first non-whitespace character
        /// appears, or the input length if the data contains only whitespace characters.
        /// </summary>
        public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan <byte> utf8Data)
        {
            // This method is optimized for the case where the input data is ASCII, and if the
            // data does need to be trimmed it's likely that only a relatively small number of
            // bytes will be trimmed.

            int i      = 0;
            int length = utf8Data.Length;

            while (i < length)
            {
                // Very quick check: see if the byte is in the range [ 21 .. 7F ].
                // If so, we can skip the more expensive logic later in this method.

                if ((sbyte)utf8Data[i] > (sbyte)0x20)
                {
                    break;
                }

                uint possibleAsciiByte = utf8Data[i];
                if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte))
                {
                    // The simple comparison failed. Let's read the actual byte value,
                    // and if it's ASCII we can delegate to Rune's inlined method
                    // implementation.

                    if (Rune.IsWhiteSpace(new Rune(possibleAsciiByte)))
                    {
                        i++;
                        continue;
                    }
                }
                else
                {
                    // Not ASCII data. Go back to the slower "decode the entire scalar"
                    // code path, then compare it against our Unicode tables.

                    Rune.DecodeFromUtf8(utf8Data.Slice(i), out Rune decodedRune, out int bytesConsumed);
                    if (Rune.IsWhiteSpace(decodedRune))
                    {
                        i += bytesConsumed;
                        continue;
                    }
                }

                break; // If we got here, we saw a non-whitespace subsequence.
            }

            return(i);
        }
Ejemplo n.º 8
0
        public ValueParseResult TryParse(ReadOnlySpan <byte> readerSpan, out bool result, out int consumedLength, out int lineSpan, out int colSpan)
        {
            result         = false;
            consumedLength = 0;
            lineSpan       = 1;
            colSpan        = 0;

            // is input empty
            if (readerSpan.Length <= 0)
            {
                // did any prior processing occur
                return(this._buffPos > 0
                    ? _cleanup(this, ValueParseResult.FailureEOF)
                    : ValueParseResult.EOF);
            }

            // determine what we're reading
            var expectedLength = 4;
            var src            = this._buffPos > 0 ? this.Buffer.Span : readerSpan;

            switch (src[0])
            {
            case JsonTokens.TrueFirst:
                result = true;
                break;

            case JsonTokens.FalseFirst:
                expectedLength = 5;
                break;

            default:
                this._buffPos = 0;
                if (Rune.DecodeFromUtf8(readerSpan, out var rune, out _) != OperationStatus.Done)
                {
                    rune = default;
                }

                return(ValueParseResult.Failure("Unexpected token, expected true/false.", rune));
            }

            // if reader buffer is too small, copy its contents then signal EOF
            var tooSmall = readerSpan.Length < expectedLength - this._buffPos;

            if (tooSmall || this._buffPos > 0)
            {
                var tlen = Math.Min(expectedLength - this._buffPos, readerSpan.Length);

                readerSpan.Slice(0, tlen).CopyTo(this.Buffer.Span[this._buffPos..]);
Ejemplo n.º 9
0
        public virtual int FindFirstCharacterToEncodeUtf8(ReadOnlySpan <byte> utf8Text)
        {
            int utf8TextOriginalLength = utf8Text.Length;

            while (!utf8Text.IsEmpty)
            {
                OperationStatus opStatus = Rune.DecodeFromUtf8(utf8Text, out Rune scalarValue, out int bytesConsumed);
                if (opStatus != OperationStatus.Done || WillEncode(scalarValue.Value))
                {
                    break;
                }
                utf8Text = utf8Text.Slice(bytesConsumed);
            }

            return((utf8Text.IsEmpty) ? -1 : utf8TextOriginalLength - utf8Text.Length);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data;
        /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as
        /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced
        /// with U+FFD.
        /// </summary>
        public static Utf8String ValidateAndFixupUtf8String(Utf8String value)
        {
            if (Utf8String.IsNullOrEmpty(value))
            {
                return(value);
            }

            ReadOnlySpan <byte> valueAsBytes = value.AsBytes();

            int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _);

            if (idxOfFirstInvalidData < 0)
            {
                return(value);
            }

            // TODO_UTF8STRING: Replace this with the faster implementation once it's available.
            // (The faster implementation is in the dev/utf8string_bak branch currently.)

            MemoryStream memStream = new MemoryStream();

            memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData));

            valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData);
            do
            {
                if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done)
                {
                    //  Valid scalar value - copy data as-is to MemoryStream
                    memStream.Write(valueAsBytes.Slice(0, bytesConsumed));
                }
                else
                {
                    // Invalid scalar value - copy U+FFFD to MemoryStream
                    memStream.Write(ReplacementCharSequence);
                }

                valueAsBytes = valueAsBytes.Slice(bytesConsumed);
            } while (!valueAsBytes.IsEmpty);

            bool success = memStream.TryGetBuffer(out ArraySegment <byte> memStreamBuffer);

            Debug.Assert(success, "Couldn't get underlying MemoryStream buffer.");

            return(Utf8String.DangerousCreateWithoutValidation(memStreamBuffer, assumeWellFormed: true));
        }
Ejemplo n.º 11
0
        public static int GetIndexOfFirstInvalidUtf8Sequence(ReadOnlySpan <byte> utf8Data, out bool isAscii)
        {
            // TODO_UTF8STRING: Replace this with the faster drop-in replacement when it's available (coreclr #21948).

            bool tempIsAscii        = true;
            int  originalDataLength = utf8Data.Length;

            while (!utf8Data.IsEmpty)
            {
                if (Rune.DecodeFromUtf8(utf8Data, out Rune result, out int bytesConsumed) != OperationStatus.Done)
                {
                    break;
                }

                tempIsAscii &= result.IsAscii;
                utf8Data     = utf8Data.Slice(bytesConsumed);
            }

            isAscii = tempIsAscii;
            return((utf8Data.IsEmpty) ? -1 : (originalDataLength - utf8Data.Length));
        }
Ejemplo n.º 12
0
        public static void Trim(string input)
        {
            // Arrange

            using BoundedUtf8Span boundedSpan = new BoundedUtf8Span(input);
            Utf8Span span = boundedSpan.Span;

            // Act

            Utf8Span trimmed = span.Trim();

            // Assert
            // Compute the trim manually and ensure it matches the trimmed span's characteristics.

            ReadOnlySpan <byte> utf8Bytes = span.Bytes;

            while (!utf8Bytes.IsEmpty)
            {
                OperationStatus status = Rune.DecodeFromUtf8(utf8Bytes, out Rune decodedRune, out int bytesConsumed);
                Assert.Equal(OperationStatus.Done, status);

                if (!Rune.IsWhiteSpace(decodedRune))
                {
                    break;
                }

                utf8Bytes = utf8Bytes.Slice(bytesConsumed);
            }
            while (!utf8Bytes.IsEmpty)
            {
                OperationStatus status = Rune.DecodeLastFromUtf8(utf8Bytes, out Rune decodedRune, out int bytesConsumed);
                Assert.Equal(OperationStatus.Done, status);

                if (!Rune.IsWhiteSpace(decodedRune))
                {
                    break;
                }

                utf8Bytes = utf8Bytes[..^ bytesConsumed];
Ejemplo n.º 13
0
        public ValueParseResult TryParse(ReadOnlySpan <byte> readerSpan, out ImmutableArray <JsonValue> result, out int consumedLength, out int lineSpan, out int colSpan)
        {
            result         = default;
            consumedLength = 0;
            lineSpan       = 1;
            colSpan        = 0;

            // is input empty
            if (readerSpan.Length <= 0 && this._innerReader == null)
            {
                // did any prior processing occur
                return(this._arr != null
                    ? _cleanup(this, ValueParseResult.FailureEOF)
                    : ValueParseResult.EOF);
            }

            // if we are not continuing, ensure it's an object that's being parsed
            if (this._arr == null)
            {
                if (readerSpan[consumedLength++] != JsonTokens.OpeningBracket)
                {
                    if (Rune.DecodeFromUtf8(readerSpan, out var rune, out _) != OperationStatus.Done)
                    {
                        rune = default;
                    }

                    return(_cleanup(this, ValueParseResult.Failure("Unexpected token, expected {.", rune)));
                }

                this._expectedNext = ExpectedToken.ValueOrEnd;
                this._arr          = ImmutableArray.CreateBuilder <JsonValue>();
                ++this._colSpan;
                ++this._streamPos;
            }

            // if continuing, check if any value is being parsed
            if (this._innerReader != null)
            {
                // valid only if expecting value
                if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                {
                    return(_cleanup(this, ValueParseResult.Failure("Invalid internal state.", default)));
                }

                // parse inner value
                ++consumedLength;
                var innerResult = this.ParseInner(readerSpan, ref consumedLength);
                switch (innerResult.Type)
                {
                case ValueParseResultType.Success:
                    this._innerReader.Reset();
                    this._innerReader = null;
                    break;

                case ValueParseResultType.EOF:
                    return(innerResult);

                case ValueParseResultType.Failure:
                    return(_cleanup(this, innerResult));
                }
            }

            // read and parse array items
            var completedParsing = false;

            while (consumedLength < readerSpan.Length)
            {
                switch (readerSpan[consumedLength++])
                {
                case JsonTokens.WhitespaceSpace:
                    ++this._colSpan;
                    ++this._streamPos;
                    break;

                case JsonTokens.WhitespaceHorizontalTab:
                    this._colSpan += 4;     // fite me
                    ++this._streamPos;
                    break;

                case JsonTokens.WhitespaceCarriageReturn:
                    // usually as part of CRLF, really no other reason for it to exist
                    // old macs don't exist
                    break;

                case JsonTokens.WhitespaceNewline:
                    ++this._lineSpan;
                    this._colSpan = 0;
                    ++this._streamPos;
                    break;

                case JsonTokens.ItemSeparator:
                    if (this._expectedNext != ExpectedToken.ItemSeparatorOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected item separator.", new Rune(JsonTokens.ItemSeparator))));
                    }

                    ++this._colSpan;
                    ++this._streamPos;
                    this._expectedNext = ExpectedToken.Value;
                    break;

                case JsonTokens.ClosingBracket:
                    if (this._expectedNext != ExpectedToken.ItemSeparatorOrEnd && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array end.", new Rune(JsonTokens.ClosingBracket))));
                    }

                    ++this._colSpan;
                    ++this._streamPos;
                    completedParsing = true;
                    break;

                case JsonTokens.NullFirst:
                    if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array item (null).", new Rune(JsonTokens.NullFirst))));
                    }

                    this._innerReader = this._innerReaders.NullReader;
                    break;

                case JsonTokens.TrueFirst:
                case JsonTokens.FalseFirst:
                    if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array item (boolean).", new Rune(readerSpan[consumedLength - 1]))));
                    }

                    this._innerReader = this._innerReaders.BooleanReader;
                    break;

                case JsonTokens.NumberSign:
                case JsonTokens.Digit0:
                case JsonTokens.Digit1:
                case JsonTokens.Digit2:
                case JsonTokens.Digit3:
                case JsonTokens.Digit4:
                case JsonTokens.Digit5:
                case JsonTokens.Digit6:
                case JsonTokens.Digit7:
                case JsonTokens.Digit8:
                case JsonTokens.Digit9:
                    if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array item (number).", new Rune(readerSpan[consumedLength - 1]))));
                    }

                    this._innerReader = this._innerReaders.NumberReader;
                    break;

                case JsonTokens.QuoteMark:
                    if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array item (string).", new Rune(JsonTokens.QuoteMark))));
                    }

                    this._innerReader = this._innerReaders.StringReader;
                    break;

                case JsonTokens.OpeningBracket:
                    if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array item (array).", new Rune(JsonTokens.OpeningBracket))));
                    }

                    this._innerReader = new JsonArrayReader(this._innerReaders);
                    break;

                case JsonTokens.OpeningBrace:
                    if (this._expectedNext != ExpectedToken.Value && this._expectedNext != ExpectedToken.ValueOrEnd)
                    {
                        return(_cleanup(this, ValueParseResult.Failure("Unexpected array item (object).", new Rune(JsonTokens.OpeningBracket))));
                    }

                    this._innerReader = new JsonObjectReader(this._innerReaders);
                    break;

                default:
                    if (Rune.DecodeFromUtf8(readerSpan[(consumedLength - 1)..], out var rune, out _) != OperationStatus.Done)
Ejemplo n.º 14
0
        // skips the call to FindFirstCharacterToEncodeUtf8
        private protected virtual OperationStatus EncodeUtf8Core(
            ReadOnlySpan <byte> utf8Source,
            Span <byte> utf8Destination,
            out int bytesConsumed,
            out int bytesWritten,
            bool isFinalBlock)
        {
            int originalUtf8SourceLength      = utf8Source.Length;
            int originalUtf8DestinationLength = utf8Destination.Length;

            const int   TempUtf16CharBufferLength = 24; // arbitrarily chosen, but sufficient for any reasonable implementation
            Span <char> utf16ScratchBuffer        = stackalloc char[TempUtf16CharBufferLength];

            while (!utf8Source.IsEmpty)
            {
                OperationStatus opStatus = Rune.DecodeFromUtf8(utf8Source, out Rune scalarValue, out int bytesConsumedJustNow);
                if (opStatus != OperationStatus.Done)
                {
                    if (!isFinalBlock && opStatus == OperationStatus.NeedMoreData)
                    {
                        goto NeedMoreData;
                    }

                    Debug.Assert(scalarValue == Rune.ReplacementChar); // DecodeFromUtf8 should've performed substitution
                    goto MustEncode;
                }

                if (!WillEncode(scalarValue.Value))
                {
                    uint utf8lsb    = (uint)UnicodeHelpers.GetUtf8RepresentationForScalarValue((uint)scalarValue.Value);
                    int  dstIdxTemp = 0;
                    do
                    {
                        if ((uint)dstIdxTemp >= (uint)utf8Destination.Length)
                        {
                            goto DestinationTooSmall;
                        }
                        utf8Destination[dstIdxTemp++] = (byte)utf8lsb;
                    } while ((utf8lsb >>= 8) != 0);
                    utf8Source      = utf8Source.Slice(bytesConsumedJustNow);
                    utf8Destination = utf8Destination.Slice(dstIdxTemp);
                    continue;
                }

MustEncode:

                if (!TryEncodeUnicodeScalarUtf8((uint)scalarValue.Value, utf16ScratchBuffer, utf8Destination, out int bytesWrittenJustNow))
                {
                    goto DestinationTooSmall;
                }

                utf8Source      = utf8Source.Slice(bytesConsumedJustNow);
                utf8Destination = utf8Destination.Slice(bytesWrittenJustNow);
            }

            // And we're finished!

            OperationStatus retVal = OperationStatus.Done;

ReturnCommon:
            bytesConsumed = originalUtf8SourceLength - utf8Source.Length;
            bytesWritten  = originalUtf8DestinationLength - utf8Destination.Length;
            return(retVal);

NeedMoreData:
            retVal = OperationStatus.NeedMoreData;
            goto ReturnCommon;

DestinationTooSmall:
            retVal = OperationStatus.DestinationTooSmall;
            goto ReturnCommon;
        }
Ejemplo n.º 15
0
 [InlineData(new byte[] { 0xF0, 0x9F, 0x98, 0xB2 }, OperationStatus.Done, 0x1F632, 4)]       // [ F0 9F 98 B2 ] is U+1F632 ASTONISHED FACE
 public static void DecodeFromUtf8(byte[] data, OperationStatus expectedOperationStatus, int expectedRuneValue, int expectedBytesConsumed)
 {
     Assert.Equal(expectedOperationStatus, Rune.DecodeFromUtf8(data, out Rune actualRune, out int actualBytesConsumed));
     Assert.Equal(expectedRuneValue, actualRune.Value);
     Assert.Equal(expectedBytesConsumed, actualBytesConsumed);
 }
Ejemplo n.º 16
0
        /// <summary>
        /// Transcodes the UTF-8 <paramref name="source"/> buffer to <paramref name="destination"/> as UTF-16.
        /// </summary>
        /// <remarks>
        /// If <paramref name="replaceInvalidSequences"/> is <see langword="true"/>, invalid UTF-8 sequences
        /// in <paramref name="source"/> will be replaced with U+FFFD in <paramref name="destination"/>, and
        /// this method will not return <see cref="OperationStatus.InvalidData"/>.
        /// </remarks>
        public static unsafe OperationStatus ToUtf16(ReadOnlySpan <byte> source, Span <char> destination, out int bytesRead, out int charsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true)
        {
            // Throwaway span accesses - workaround for https://github.com/dotnet/runtime/issues/12332

            _ = source.Length;
            _ = destination.Length;

            // We'll be mutating these values throughout our loop.

            fixed(byte *pOriginalSource = &MemoryMarshal.GetReference(source))
            fixed(char *pOriginalDestination = &MemoryMarshal.GetReference(destination))
            {
                // We're going to bulk transcode as much as we can in a loop, iterating
                // every time we see bad data that requires replacement.

                OperationStatus operationStatus        = OperationStatus.Done;
                byte *          pInputBufferRemaining  = pOriginalSource;
                char *          pOutputBufferRemaining = pOriginalDestination;

                while (!source.IsEmpty)
                {
                    // We've pinned the spans at the entry point to this method.
                    // It's safe for us to use Unsafe.AsPointer on them during this loop.

                    operationStatus = Utf8Utility.TranscodeToUtf16(
                        pInputBuffer: (byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)),
                        inputLength: source.Length,
                        pOutputBuffer: (char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)),
                        outputCharsRemaining: destination.Length,
                        pInputBufferRemaining: out pInputBufferRemaining,
                        pOutputBufferRemaining: out pOutputBufferRemaining);

                    // If we finished the operation entirely or we ran out of space in the destination buffer,
                    // or if we need more input data and the caller told us that there's possibly more data
                    // coming, return immediately.

                    if (operationStatus <= OperationStatus.DestinationTooSmall ||
                        (operationStatus == OperationStatus.NeedMoreData && !isFinalBlock))
                    {
                        break;
                    }

                    // We encountered invalid data, or we need more data but the caller told us we're
                    // at the end of the stream. In either case treat this as truly invalid.
                    // If the caller didn't tell us to replace invalid sequences, return immediately.

                    if (!replaceInvalidSequences)
                    {
                        operationStatus = OperationStatus.InvalidData; // status code may have been NeedMoreData - force to be error
                        break;
                    }

                    // We're going to attempt to write U+FFFD to the destination buffer.
                    // Do we even have enough space to do so?

                    destination = destination.Slice((int)(pOutputBufferRemaining - (char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination))));

                    if (destination.IsEmpty)
                    {
                        operationStatus = OperationStatus.DestinationTooSmall;
                        break;
                    }

                    destination[0] = (char)UnicodeUtility.ReplacementChar;
                    destination    = destination.Slice(1);

                    // Now figure out how many bytes of the source we must skip over before we should retry
                    // the operation. This might be more than 1 byte.

                    source = source.Slice((int)(pInputBufferRemaining - (byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source))));
                    Debug.Assert(!source.IsEmpty, "Expected 'Done' if source is fully consumed.");

                    Rune.DecodeFromUtf8(source, out _, out int bytesConsumedJustNow);
                    source = source.Slice(bytesConsumedJustNow);

                    operationStatus        = OperationStatus.Done; // we patched the error - if we're about to break out of the loop this is a success case
                    pInputBufferRemaining  = (byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
                    pOutputBufferRemaining = (char *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination));
                }

                // Not possible to make any further progress - report to our caller how far we got.

                bytesRead    = (int)(pInputBufferRemaining - pOriginalSource);
                charsWritten = (int)(pOutputBufferRemaining - pOriginalDestination);
                return(operationStatus);
            }
        }
Ejemplo n.º 17
0
            public bool MoveNext()
            {
                var operationStatus = Rune.DecodeFromUtf8(_remaining, out _current, out var bytesConsumed);

                _remaining = _remaining[bytesConsumed..];
Ejemplo n.º 18
0
        private void PollSDLEvents()
        {
            Span <char> textEditingBuffer = stackalloc char[SDL.Keyboard.TextEditingEvent.TextSize];

            while (SDL.PollEvent(out SDL.Event ev) == 1)
            {
                switch (ev.Type)
                {
                case SDL.EventType.Quit:
                    Interlocked.Increment(ref _isExiting);
                    break;

                    #region Joystick

                case SDL.EventType.JoyDeviceAdded:
                    Joystick.AddDevice(ev.JoystickDevice.Which);
                    break;

                case SDL.EventType.JoyDeviceRemoved:
                    Joystick.RemoveDevice(ev.JoystickDevice.Which);
                    break;

                    #endregion

                    #region GameController

                case SDL.EventType.ControllerDeviceRemoved:
                    GamePad.RemoveDevice(ev.ControllerDevice.Which);
                    break;

                case SDL.EventType.ControllerButtonUp:
                case SDL.EventType.ControllerButtonDown:
                case SDL.EventType.ControllerAxisMotion:
                    GamePad.UpdatePacketInfo(ev.ControllerDevice.Which, ev.ControllerDevice.TimeStamp);
                    break;

                    #endregion

                    #region Mouse

                case SDL.EventType.MouseWheel:
                    _window.Mouse.ScrollX += ev.MouseWheel.X * MouseWheelDelta;
                    _window.Mouse.ScrollY += ev.MouseWheel.Y * MouseWheelDelta;
                    break;

                case SDL.EventType.MouseMotion:
                    _window.Mouse.State.X = ev.MouseMotion.X;
                    _window.Mouse.State.Y = ev.MouseMotion.Y;
                    break;

                    #endregion

                    #region Keyboard

                case SDL.EventType.KeyDown:
                {
                    bool hasMapping = KeyboardUtil.ToXna(ev.KeyboardKey.Keysym.Sym, out var key);
                    if (hasMapping)
                    {
                        if (!Keyboard._keysDown.Contains(key))
                        {
                            Keyboard._keysDown.Add(key);
                        }
                    }

                    // TODO: validate rune?
                    Rune.TryCreate(ev.KeyboardKey.Keysym.Sym, out var rune);

                    var inputEv = new TextInputEventArgs(rune, hasMapping ? key : (Keys?)null);
                    _window.OnKeyDown(inputEv);
                    break;
                }

                case SDL.EventType.KeyUp:
                {
                    bool hasMapping = KeyboardUtil.ToXna(ev.KeyboardKey.Keysym.Sym, out var key);
                    if (hasMapping)
                    {
                        Keyboard._keysDown.Remove(key);
                    }

                    // TODO: validate rune?
                    Rune.TryCreate(ev.KeyboardKey.Keysym.Sym, out var rune);

                    _window.OnKeyUp(new TextInputEventArgs(rune, hasMapping ? key : (Keys?)null));
                    break;
                }

                    #endregion

                    #region Text-Input/Editing

                case SDL.EventType.TextInput:
                    unsafe
                    {
                        var utf8 = new Span <byte>(ev.TextInput.Text, SDL.Keyboard.TextInputEvent.TextSize);
                        utf8 = SliceToNullTerminator(utf8);
                        while (!utf8.IsEmpty)
                        {
                            var status = Rune.DecodeFromUtf8(utf8, out Rune rune, out int bytesConsumed);
                            if (status != OperationStatus.Done)
                            {
                                // This should never occur if SDL gives use valid data.
                                throw new InvalidDataException("Failed to decode UTF-8 text input: " + status);
                            }
                            utf8 = utf8[bytesConsumed..];
        public bool MoveNext()
        {
            var status = Rune.DecodeFromUtf8(_utf8, out _current, out int consumed);

            _utf8 = _utf8[consumed..];
Ejemplo n.º 20
0
        /// <summary>
        /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data;
        /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as
        /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced
        /// with U+FFFD.
        /// </summary>
        public static Utf8String ValidateAndFixupUtf8String(Utf8String value)
        {
            if (value.Length == 0)
            {
                return(value);
            }

            ReadOnlySpan <byte> valueAsBytes = value.AsBytes();

            int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _);

            if (idxOfFirstInvalidData < 0)
            {
                return(value);
            }

            // TODO_UTF8STRING: Replace this with the faster implementation once it's available.
            // (The faster implementation is in the dev/utf8string_bak branch currently.)

            MemoryStream memStream = new MemoryStream();

#if (!NETSTANDARD2_0 && !NETFRAMEWORK)
            memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData));

            valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData);
            do
            {
                if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done)
                {
                    // Valid scalar value - copy data as-is to MemoryStream
                    memStream.Write(valueAsBytes.Slice(0, bytesConsumed));
                }
                else
                {
                    // Invalid scalar value - copy U+FFFD to MemoryStream
                    memStream.Write(ReplacementCharSequence);
                }

                valueAsBytes = valueAsBytes.Slice(bytesConsumed);
            } while (!valueAsBytes.IsEmpty);
#else
            if (!MemoryMarshal.TryGetArray(value.AsMemoryBytes(), out ArraySegment <byte> valueArraySegment))
            {
                Debug.Fail("Utf8String on netstandard should always be backed by an array.");
            }

            memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, idxOfFirstInvalidData);

            valueArraySegment = new ArraySegment <byte>(
                valueArraySegment.Array,
                idxOfFirstInvalidData,
                valueArraySegment.Count - idxOfFirstInvalidData);
            do
            {
                if (Rune.DecodeFromUtf8(valueArraySegment, out _, out int bytesConsumed) == OperationStatus.Done)
                {
                    // Valid scalar value - copy data as-is to MemoryStream
                    memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, bytesConsumed);
                }
                else
                {
                    // Invalid scalar value - copy U+FFFD to MemoryStream
                    memStream.Write(ReplacementCharSequence, 0, ReplacementCharSequence.Length);
                }

                valueArraySegment = new ArraySegment <byte>(
                    valueArraySegment.Array,
                    valueArraySegment.Offset + bytesConsumed,
                    valueArraySegment.Count - bytesConsumed);
            } while (valueArraySegment.Count > 0);
#endif

            bool success = memStream.TryGetBuffer(out ArraySegment <byte> memStreamBuffer);
            Debug.Assert(success, "Couldn't get underlying MemoryStream buffer.");

            return(Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer));
        }
Ejemplo n.º 21
0
        public ValueParseResult TryParse(ReadOnlySpan <byte> readerSpan, out double result, out int consumedLength, out int lineSpan, out int colSpan)
        {
            result         = double.NaN;
            consumedLength = 0;
            lineSpan       = 1;
            colSpan        = 0;

            // if span is empty, and no parsing occured, signal EOF immediately
            if (readerSpan.Length <= 0 && this._lastPart == NumberPart.None)
            {
                return(ValueParseResult.EOF);
            }

            // if we are not continuing, check what we're parsing
            if (this.Buffer.Length == 0)
            {
                switch (readerSpan[consumedLength++])
                {
                // a number in JSON can begin with - or digits 0-9
                case JsonTokens.NumberSign:
                    this._currentStructure = NumberStructure.HasSign;
                    this._lastPart         = NumberPart.NumberSign;
                    break;

                // digit zero is a bit special in that if it's the first digit in a number, it becomes the only
                // legal digit before decimal point, hence special handling for it
                case JsonTokens.Digit0:
                    this._currentStructure = NumberStructure.LeadingZero;
                    this._lastPart         = NumberPart.FirstDigit;
                    break;

                // digits 1-9 are also valid as starting characters of a number, and unlike 0, they do not
                // restrict pre-decimal point digit count (though IEEE754 64-bit binary float limits still apply)
                case JsonTokens.Digit1:
                case JsonTokens.Digit2:
                case JsonTokens.Digit3:
                case JsonTokens.Digit4:
                case JsonTokens.Digit5:
                case JsonTokens.Digit6:
                case JsonTokens.Digit7:
                case JsonTokens.Digit8:
                case JsonTokens.Digit9:
                    this._currentStructure = NumberStructure.LeadingNonzero;
                    this._lastPart         = NumberPart.FirstDigit;
                    break;

                // not a legal character
                default:
                    if (Rune.DecodeFromUtf8(readerSpan, out var rune, out _) != OperationStatus.Done)
                    {
                        rune = default;
                    }

                    return(ValueParseResult.Failure("Unexpected token, expected 0-9 or -.", rune));
                }
            }

            // if we got empty when previous parsing occured, just don't parse, it's an end-of-content marker
            var completedParsing = false;

            if (readerSpan.Length > 0)
            {
                var offByOne = false;

                // try reading the number
                while (consumedLength < readerSpan.Length)
                {
                    switch (readerSpan[consumedLength++])
                    {
                    // digit 0 is special
                    // if it's the first digit in the non-fractional part, it is the only legal digit before decimal point
                    // otherwise it behaves like a regular digit
                    // this means it can appear:
                    // - as first digit before decimal point
                    // - as non-first digit before decimal point, if first digit was not a 0
                    // - as a digit after decimal point before exponent mark
                    // - as a digit after exponent mark or exponent sign
                    // see: https://www.json.org/img/number.png
                    case JsonTokens.Digit0:
                        if (this._lastPart == NumberPart.FirstDigit && this._currentStructure.HasFlag(NumberStructure.LeadingZero))
                        {
                            return(_cleanup(this, ValueParseResult.Failure("Digit in illegal separator. Expected decimal point.", new Rune(readerSpan[consumedLength - 1]))));
                        }

                        if (this._lastPart == NumberPart.NumberSign)
                        {
                            this._currentStructure |= NumberStructure.LeadingZero;
                            this._lastPart          = NumberPart.FirstDigit;
                        }
                        else
                        {
                            this._lastPart = this._lastPart switch
                            {
                                NumberPart.FirstDigit => NumberPart.Digit,
                                NumberPart.FractionDot => NumberPart.FractionDigit,
                                NumberPart.ExponentMarker or NumberPart.ExponentSign => NumberPart.ExponentDigit,
                                _ => this._lastPart
                            };
                        }
                        break;

                    // non-0 digits can appear:
                    // - as first digit before decimal points
                    // - as non-first digit before decimal point, if first digit was not a 0
                    // - as a digit after decimal point before exponent mark
                    // - as a digit after exponent mark or exponent sign
                    // see: https://www.json.org/img/number.png
                    case JsonTokens.Digit1:
                    case JsonTokens.Digit2:
                    case JsonTokens.Digit3:
                    case JsonTokens.Digit4:
                    case JsonTokens.Digit5:
                    case JsonTokens.Digit6:
                    case JsonTokens.Digit7:
                    case JsonTokens.Digit8:
                    case JsonTokens.Digit9:
                        if (this._lastPart == NumberPart.FirstDigit && this._currentStructure.HasFlag(NumberStructure.LeadingZero))
                        {
                            return(_cleanup(this, ValueParseResult.Failure("Digit in illegal separator. Expected decimal point.", new Rune(readerSpan[consumedLength - 1]))));
                        }

                        if (this._lastPart == NumberPart.NumberSign)
                        {
                            this._currentStructure |= NumberStructure.LeadingNonzero;
                            this._lastPart          = NumberPart.FirstDigit;
                        }
                        else
                        {
                            this._lastPart = this._lastPart switch
                            {
                                NumberPart.FirstDigit => NumberPart.Digit,
                                NumberPart.FractionDot => NumberPart.FractionDigit,
                                NumberPart.ExponentMarker or NumberPart.ExponentSign => NumberPart.ExponentDigit,
                                _ => this._lastPart
                            };
                        }
                        break;

                    // decimal separator can appear only after at least one digit, and only once
                    case JsonTokens.DecimalSeparator:
                        if (this._lastPart != NumberPart.Digit && this._lastPart != NumberPart.FirstDigit)
                        {
                            return(_cleanup(this, ValueParseResult.Failure("Unexpected decimal separator.", new Rune('.'))));
                        }

                        this._currentStructure |= NumberStructure.Fraction;
                        this._lastPart          = NumberPart.FractionDot;
                        break;

                    // exponent marker can appear only after at least one digit, or at least one digit after
                    // decimal point, and only once, regardless of variety
                    case JsonTokens.ExponentSmall:
                    case JsonTokens.ExponentCapital:
                        if (this._lastPart != NumberPart.FirstDigit && this._lastPart != NumberPart.Digit && this._lastPart != NumberPart.FractionDigit)
                        {
                            return(_cleanup(this, ValueParseResult.Failure("Unexpected exponent marker.", new Rune(readerSpan[consumedLength - 1]))));
                        }

                        this._currentStructure |= NumberStructure.Exponent;
                        this._lastPart          = NumberPart.ExponentMarker;

                        if (this._currentStructure.HasFlag(NumberStructure.Fraction))
                        {
                            this._currentStructure |= NumberStructure.FractionValid;
                        }

                        break;

                    // exponent sign can appear only after exponent marker
                    case JsonTokens.NumberSign:
                    case JsonTokens.ExponentSignPositive:
                        if (this._lastPart != NumberPart.ExponentMarker)
                        {
                            return(_cleanup(this, ValueParseResult.Failure("Unexpected exponent sign.", new Rune(readerSpan[consumedLength - 1]))));
                        }

                        this._currentStructure |= NumberStructure.SignedExponent;
                        this._lastPart          = NumberPart.ExponentSign;
                        break;

                    // this is a situation where a non number-character is encountered
                    // this is invalid if immediately after number sign, decimal point, exponent marker, or
                    // exponent sign, otherwise consider it a completed number
                    default:
                        switch (this._lastPart)
                        {
                        case NumberPart.NumberSign:
                        case NumberPart.FractionDot:
                        case NumberPart.ExponentMarker:
                        case NumberPart.ExponentSign:
                            if (Rune.DecodeFromUtf8(readerSpan[(consumedLength - 1)..], out var rune, out _) != OperationStatus.Done)
                            {
                                rune = default;
                            }

                            return(_cleanup(this, ValueParseResult.Failure("Unexpected token, expected 0-9.", rune)));
                        }

                        offByOne         = true;
                        completedParsing = true;
                        break;
                    }

                    // if parsing is completed, do not attempt to resume
                    if (completedParsing)
                    {
                        break;
                    }
                }
Ejemplo n.º 22
0
        public void RunTest()
        {
            Console.WriteLine("-- BEGIN TEST --");

            int encodingCharCount = Encoding.UTF8.GetCharCount(_data.Span);

            Console.WriteLine($"Encoding.UTF8.GetCharCount returned {encodingCharCount}.");

            {
                ReadOnlySpan <byte> input = _data.Span;
                int runeIterCharCount     = 0;
                while (!input.IsEmpty)
                {
                    Rune.DecodeFromUtf8(input, out Rune thisRune, out int bytesConsumed);
                    runeIterCharCount += thisRune.Utf16SequenceLength; // ok if U+FFFD replacement
                    input              = input.Slice(bytesConsumed);
                }

                Console.WriteLine($"Rune iteration said there were {runeIterCharCount} UTF-16 chars.");

                if (encodingCharCount != runeIterCharCount)
                {
                    throw new Exception("Rune iteration char count mismatch!!");
                }
            }

            char[] chars        = new char[encodingCharCount];
            int    charsWritten = Encoding.UTF8.GetChars(_data.Span, chars);

            Console.WriteLine($"Encoding.UTF8.GetChars returned {charsWritten} chars written.");

            if (encodingCharCount != charsWritten)
            {
                throw new Exception("GetChars return value mismatch!!");
            }

            {
                ReadOnlySpan <byte> inputUtf8  = _data.Span;
                ReadOnlySpan <char> inputUtf16 = chars;

                while (!inputUtf8.IsEmpty && !inputUtf16.IsEmpty)
                {
                    Rune.DecodeFromUtf8(inputUtf8, out Rune inputUtf8Rune, out int bytesConsumed);
                    Rune.DecodeFromUtf16(inputUtf16, out Rune inputUtf16Rune, out int charsConsumed);

                    if (inputUtf8Rune != inputUtf16Rune)
                    {
                        throw new Exception("Enumerating runes mismatch!!");
                    }

                    inputUtf8  = inputUtf8.Slice(bytesConsumed);
                    inputUtf16 = inputUtf16.Slice(charsConsumed);
                }

                if (inputUtf8.Length != inputUtf16.Length)
                {
                    throw new Exception("Rune enumeration returned mismatched lengths!");
                }
            }

            Console.WriteLine("Running ToUtf16 with replace=true and exact size buffer.");

            {
                char[]          chars2   = new char[chars.Length];
                OperationStatus opStatus = Utf8.ToUtf16(_data.Span, chars2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: true, isFinalBlock: true);

                if (opStatus != OperationStatus.Done)
                {
                    throw new Exception("Utf8.ToUtf16 returned wrong OperationStatus!!");
                }

                if (bytesReadJustNow != _data.Memory.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't read entire input!!");
                }

                if (charsWrittenJustNow != chars2.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't fill entire response buffer!!");
                }

                if (!chars.SequenceEqual(chars2))
                {
                    throw new Exception("Utf8.ToUtf16 returned different data than Encoding.UTF8.GetChars!!");
                }
            }

            Console.WriteLine("Running ToUtf16 with replace=true and extra large buffer.");

            {
                char[]          chars2   = new char[chars.Length + 1024];
                OperationStatus opStatus = Utf8.ToUtf16(_data.Span, chars2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: true, isFinalBlock: true);

                if (opStatus != OperationStatus.Done)
                {
                    throw new Exception("Utf8.ToUtf16 returned wrong OperationStatus!!");
                }

                if (bytesReadJustNow != _data.Memory.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't read entire input!!");
                }

                if (charsWrittenJustNow != chars.Length)
                {
                    throw new Exception("Utf8.ToUtf16 didn't fill entire response buffer!!");
                }

                if (!chars2.AsSpan(0, charsWrittenJustNow).SequenceEqual(chars))
                {
                    throw new Exception("Utf8.ToUtf16 returned different data than Encoding.UTF8.GetChars!!");
                }
            }

            Console.WriteLine("Running ToUtf16 with replace=false and extra large buffer.");

            {
                ReadOnlySpan <byte> input  = _data.Span;
                Span <char>         output = new char[chars.Length + 1024];

                while (!input.IsEmpty)
                {
                    OperationStatus opStatus = Utf8.ToUtf16(input, output, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: false, isFinalBlock: true);

                    ReadOnlySpan <byte> dataReadJustNow    = input.Slice(0, bytesReadJustNow);
                    ReadOnlySpan <char> dataWrittenJustNow = output.Slice(0, charsWrittenJustNow);

                    while (!dataReadJustNow.IsEmpty && !dataWrittenJustNow.IsEmpty)
                    {
                        OperationStatus utf8Status  = Rune.DecodeFromUtf8(dataReadJustNow, out Rune inputUtf8Rune, out int bytesConsumed);
                        OperationStatus utf16Status = Rune.DecodeFromUtf16(dataWrittenJustNow, out Rune inputUtf16Rune, out int charsConsumed);

                        if (utf8Status != OperationStatus.Done)
                        {
                            throw new Exception("DecodeFromUtf8 returned unexpected value!!");
                        }

                        if (utf16Status != OperationStatus.Done)
                        {
                            throw new Exception("DecodeFromUtf16 returned unexpected value!!");
                        }

                        if (inputUtf8Rune != inputUtf16Rune)
                        {
                            throw new Exception("Enumerating runes mismatch!!");
                        }

                        dataReadJustNow    = dataReadJustNow.Slice(bytesConsumed);
                        dataWrittenJustNow = dataWrittenJustNow.Slice(charsConsumed);
                    }

                    if (dataReadJustNow.Length != dataWrittenJustNow.Length)
                    {
                        throw new Exception("Unexpected length mismatch!!");
                    }

                    input = input.Slice(bytesReadJustNow);

                    if (opStatus != OperationStatus.Done)
                    {
                        // Skip over invalid data

                        Rune.DecodeFromUtf8(input, out _, out int bytesToSkip);
                        input = input.Slice(bytesToSkip);
                    }
                }
            }

            Console.WriteLine("Trying custom decoder replacement.");

            {
                // use a custom replacement string
                Encoding encoding = Encoding.GetEncoding("utf-8", EncoderFallback.ExceptionFallback, new DecoderReplacementFallback("{BAD}"));

                string decoded = encoding.GetString(_data.Span);

                ReadOnlySpan <byte> input = _data.Span;
                char[]        decoded2    = new char[decoded.Length];
                StringBuilder builder     = new StringBuilder();

                while (!input.IsEmpty)
                {
                    OperationStatus opStatus = Utf8.ToUtf16(input, decoded2, out int bytesReadJustNow, out int charsWrittenJustNow, replaceInvalidSequences: false, isFinalBlock: true);
                    builder.Append(decoded2, 0, charsWrittenJustNow);

                    input = input.Slice(bytesReadJustNow);

                    if (opStatus != OperationStatus.Done)
                    {
                        // Skip over invalid data

                        Rune.DecodeFromUtf8(input, out _, out int bytesToSkip);
                        input = input.Slice(bytesToSkip);

                        builder.Append("{BAD}");
                    }
                }

                if (new string(decoded) != builder.ToString())
                {
                    throw new Exception("Custom decoder replacement failed!!");
                }
            }

            Console.WriteLine("-- END TEST - SUCCESS --");
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Transcodes the UTF-8 <paramref name="source"/> buffer to <paramref name="destination"/> as UTF-16.
        /// </summary>
        /// <remarks>
        /// If <paramref name="replaceInvalidSequences"/> is <see langword="true"/>, invalid UTF-8 sequences
        /// in <paramref name="source"/> will be replaced with U+FFFD in <paramref name="destination"/>, and
        /// this method will not return <see cref="OperationStatus.InvalidData"/>.
        /// </remarks>
        public static OperationStatus ToUtf16(ReadOnlySpan <byte> source, Span <char> destination, out int numBytesRead, out int numCharsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true)
        {
            int             originalSourceLength      = source.Length;
            int             originalDestinationLength = destination.Length;
            OperationStatus status = OperationStatus.Done;

            // In a loop, this is going to read and transcode one scalar value at a time
            // from the source to the destination.

            while (!source.IsEmpty)
            {
                status = Rune.DecodeFromUtf8(source, out Rune firstScalarValue, out int bytesConsumed);

                switch (status)
                {
                case OperationStatus.NeedMoreData:

                    // Input buffer ended with a partial UTF-8 sequence. Only treat this as an error
                    // if the caller told us that we shouldn't expect additional data in a
                    // future call.

                    if (!isFinalBlock)
                    {
                        goto Finish;
                    }

                    status = OperationStatus.InvalidData;
                    goto case OperationStatus.InvalidData;

                case OperationStatus.InvalidData:

                    // Input buffer contained invalid data. If the caller told us not to
                    // perform U+FFFD replacement, terminate the loop immediately and return
                    // an error to the caller.

                    if (!replaceInvalidSequences)
                    {
                        goto Finish;
                    }

                    firstScalarValue = Rune.ReplacementChar;
                    goto default;

                default:

                    // We know which scalar value we need to transcode to UTF-16.
                    // Do so now, and only terminate the loop if we ran out of space
                    // in the destination buffer.

                    if (firstScalarValue.TryEncodeToUtf16(destination, out int charsWritten))
                    {
                        source      = source.Slice(bytesConsumed); // don't use Rune.Utf16SequenceLength; we may have performed substitution
                        destination = destination.Slice(charsWritten);
                        status      = OperationStatus.Done;        // forcibly set success
                        continue;
                    }
                    else
                    {
                        status = OperationStatus.DestinationTooSmall;
                        goto Finish;
                    }
                }
            }

Finish:

            numBytesRead    = originalSourceLength - source.Length;
            numCharsWritten = originalDestinationLength - destination.Length;

            Debug.Assert((status == OperationStatus.Done) == (numBytesRead == originalSourceLength),
                         "Should report OperationStatus.Done if and only if we've consumed the entire input buffer.");

            return(status);
        }