private static uint ConsumeDataWithoutExistingPartialSequence(ReadOnlySpan <byte> utf8Bytes) { var indexOfFirstInvalidSequence = Utf8Utility.GetIndexOfFirstInvalidUtf8Sequence(utf8Bytes); if (indexOfFirstInvalidSequence < 0) { // Successfully consumed entire buffer without error. return(DefaultSequence); } else { // Couldn't consume entire buffer; is this due to a partial buffer or truly invalid data? utf8Bytes = utf8Bytes.Slice(indexOfFirstInvalidSequence); var validity = Utf8Utility.PeekFirstSequence(utf8Bytes, out int numBytesConsumed, out _); if (validity == SequenceValidity.Incomplete) { // Saw a partial (not invalid) sequence, remember it for next time. Debug.Assert(1 <= numBytesConsumed && numBytesConsumed <= 3); // Put all partial data into the high 3 bytes, making room for us to // write the count of partial bytes in the buffer as the low byte. Span <byte> partialSequenceAsBytes = stackalloc byte[sizeof(uint)]; BinaryPrimitives.WriteMachineEndian(partialSequenceAsBytes, ref numBytesConsumed); if (BitConverter.IsLittleEndian) { utf8Bytes.Slice(0, numBytesConsumed).CopyTo(partialSequenceAsBytes.Slice(1)); } else { utf8Bytes.Slice(0, numBytesConsumed).CopyTo(partialSequenceAsBytes); } return(BinaryPrimitives.ReadMachineEndian <uint>(partialSequenceAsBytes)); } else { // Truly invalid data. // (Shouldn't have gotten 'Empty' or 'WellFormed'.) Debug.Assert(validity == SequenceValidity.Invalid); return(InvalidSequence); } } }
private static uint ConsumeDataWithExistingPartialSequence(uint partialSequence, ReadOnlySpan <byte> utf8Bytes) { Debug.Assert(partialSequence != 0 && !IsInvalidPartialSequence(partialSequence)); int partialSequenceOriginalByteCount = (byte)partialSequence; if (BitConverter.IsLittleEndian) { // When we turn this into a Span<byte>, want MSB to be the first byte of the partial sequence partialSequence >>= 8; } // Copy as much data as we can from the input buffer to our partial sequence. Span <byte> partialSequenceAsBytes = stackalloc byte[sizeof(uint)]; BinaryPrimitives.WriteMachineEndian(partialSequenceAsBytes, ref partialSequence); int numBytesToCopyFromInputToPartialSequence = Math.Min(4 - partialSequenceOriginalByteCount, utf8Bytes.Length); utf8Bytes.Slice(0, numBytesToCopyFromInputToPartialSequence).CopyTo(partialSequenceAsBytes.Slice(partialSequenceOriginalByteCount)); int partialSequenceNewByteCount = partialSequenceOriginalByteCount + numBytesToCopyFromInputToPartialSequence; // And check for validity of the new (hopefully complete) partial sequence. var validity = Utf8Utility.PeekFirstSequence(partialSequenceAsBytes.Slice(0, partialSequenceNewByteCount), out int numBytesConsumed, out _); Debug.Assert(1 <= numBytesConsumed && numBytesConsumed <= 4); if (validity == SequenceValidity.WellFormed) { // This is the happy path; we've consumed some set of bytes from the input // buffer and it has caused the partial sequence to validate. Let's calculate // how many bytes from the input buffer were required to complete the sequence, // then strip them off the incoming data. // n.b. This might not be the same as numBytesToCopyFromInputToPartialSequence. int numBytesRequiredFromInputBufferToFinishPartialSequence = numBytesConsumed - partialSequenceOriginalByteCount; return(ConsumeDataWithoutExistingPartialSequence(utf8Bytes.Slice(numBytesRequiredFromInputBufferToFinishPartialSequence))); } else if (validity == SequenceValidity.Incomplete) { // We've consumed all data available to us and we still have an incomplete sequence. // It's still valid (until we see invalid bytes), so squirrel away what we've seen // and report success to our caller. Debug.Assert(numBytesConsumed < 4); Debug.Assert(numBytesConsumed == partialSequenceNewByteCount); // Put all partial data into the high 3 bytes, making room for us to // write the count of partial bytes in the buffer as the low byte. partialSequence = BinaryPrimitives.ReadMachineEndian <uint>(partialSequenceAsBytes); if (BitConverter.IsLittleEndian) { return((partialSequence << 8) | (uint)numBytesConsumed); } else { return((partialSequence & unchecked ((uint)~0xFF)) | (uint)numBytesConsumed); } } else { // Truly invalid data. // (Shouldn't have gotten 'Empty' or 'WellFormed'.) Debug.Assert(validity == SequenceValidity.Invalid); return(InvalidSequence); } }