Exemple #1
0
        private static uint ConsumeDataWithoutExistingPartialSequence(ReadOnlySpan <byte> utf8Bytes)
        {
            var indexOfFirstInvalidSequence = Utf8Utility.GetIndexOfFirstInvalidUtf8Sequence(utf8Bytes);

            if (indexOfFirstInvalidSequence < 0)
            {
                // Successfully consumed entire buffer without error.

                return(DefaultSequence);
            }
            else
            {
                // Couldn't consume entire buffer; is this due to a partial buffer or truly invalid data?

                utf8Bytes = utf8Bytes.Slice(indexOfFirstInvalidSequence);
                var validity = Utf8Utility.PeekFirstSequence(utf8Bytes, out int numBytesConsumed, out _);
                if (validity == SequenceValidity.Incomplete)
                {
                    // Saw a partial (not invalid) sequence, remember it for next time.

                    Debug.Assert(1 <= numBytesConsumed && numBytesConsumed <= 3);

                    // Put all partial data into the high 3 bytes, making room for us to
                    // write the count of partial bytes in the buffer as the low byte.

                    Span <byte> partialSequenceAsBytes = stackalloc byte[sizeof(uint)];
                    BinaryPrimitives.WriteMachineEndian(partialSequenceAsBytes, ref numBytesConsumed);

                    if (BitConverter.IsLittleEndian)
                    {
                        utf8Bytes.Slice(0, numBytesConsumed).CopyTo(partialSequenceAsBytes.Slice(1));
                    }
                    else
                    {
                        utf8Bytes.Slice(0, numBytesConsumed).CopyTo(partialSequenceAsBytes);
                    }

                    return(BinaryPrimitives.ReadMachineEndian <uint>(partialSequenceAsBytes));
                }
                else
                {
                    // Truly invalid data.
                    // (Shouldn't have gotten 'Empty' or 'WellFormed'.)

                    Debug.Assert(validity == SequenceValidity.Invalid);

                    return(InvalidSequence);
                }
            }
        }
Exemple #2
0
        private static uint ConsumeDataWithExistingPartialSequence(uint partialSequence, ReadOnlySpan <byte> utf8Bytes)
        {
            Debug.Assert(partialSequence != 0 && !IsInvalidPartialSequence(partialSequence));

            int partialSequenceOriginalByteCount = (byte)partialSequence;

            if (BitConverter.IsLittleEndian)
            {
                // When we turn this into a Span<byte>, want MSB to be the first byte of the partial sequence
                partialSequence >>= 8;
            }

            // Copy as much data as we can from the input buffer to our partial sequence.

            Span <byte> partialSequenceAsBytes = stackalloc byte[sizeof(uint)];

            BinaryPrimitives.WriteMachineEndian(partialSequenceAsBytes, ref partialSequence);

            int numBytesToCopyFromInputToPartialSequence = Math.Min(4 - partialSequenceOriginalByteCount, utf8Bytes.Length);

            utf8Bytes.Slice(0, numBytesToCopyFromInputToPartialSequence).CopyTo(partialSequenceAsBytes.Slice(partialSequenceOriginalByteCount));
            int partialSequenceNewByteCount = partialSequenceOriginalByteCount + numBytesToCopyFromInputToPartialSequence;

            // And check for validity of the new (hopefully complete) partial sequence.

            var validity = Utf8Utility.PeekFirstSequence(partialSequenceAsBytes.Slice(0, partialSequenceNewByteCount), out int numBytesConsumed, out _);

            Debug.Assert(1 <= numBytesConsumed && numBytesConsumed <= 4);

            if (validity == SequenceValidity.WellFormed)
            {
                // This is the happy path; we've consumed some set of bytes from the input
                // buffer and it has caused the partial sequence to validate. Let's calculate
                // how many bytes from the input buffer were required to complete the sequence,
                // then strip them off the incoming data.

                // n.b. This might not be the same as numBytesToCopyFromInputToPartialSequence.
                int numBytesRequiredFromInputBufferToFinishPartialSequence = numBytesConsumed - partialSequenceOriginalByteCount;
                return(ConsumeDataWithoutExistingPartialSequence(utf8Bytes.Slice(numBytesRequiredFromInputBufferToFinishPartialSequence)));
            }
            else if (validity == SequenceValidity.Incomplete)
            {
                // We've consumed all data available to us and we still have an incomplete sequence.
                // It's still valid (until we see invalid bytes), so squirrel away what we've seen
                // and report success to our caller.

                Debug.Assert(numBytesConsumed < 4);
                Debug.Assert(numBytesConsumed == partialSequenceNewByteCount);

                // Put all partial data into the high 3 bytes, making room for us to
                // write the count of partial bytes in the buffer as the low byte.

                partialSequence = BinaryPrimitives.ReadMachineEndian <uint>(partialSequenceAsBytes);
                if (BitConverter.IsLittleEndian)
                {
                    return((partialSequence << 8) | (uint)numBytesConsumed);
                }
                else
                {
                    return((partialSequence & unchecked ((uint)~0xFF)) | (uint)numBytesConsumed);
                }
            }
            else
            {
                // Truly invalid data.
                // (Shouldn't have gotten 'Empty' or 'WellFormed'.)

                Debug.Assert(validity == SequenceValidity.Invalid);

                return(InvalidSequence);
            }
        }