Exemplo n.º 1
0
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon
        private protected sealed override unsafe int GetCharsFast(byte *pBytes, int bytesLength, char *pChars, int charsLength, out int bytesConsumed)
        {
            // We don't care about the exact OperationStatus value returned by the workhorse routine; we only
            // care if the workhorse was able to consume the entire input payload. If we're unable to do so,
            // we'll handle the remainder in the fallback routine.

            Utf8Utility.TranscodeToUtf16(pBytes, bytesLength, pChars, charsLength, out byte *pInputBufferRemaining, out char *pOutputBufferRemaining);

            bytesConsumed = (int)(pInputBufferRemaining - pBytes);
            return((int)(pOutputBufferRemaining - pChars));
        }
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
            return(Utf8Utility.ValidateAndFixupUtf8String(newString) !); // TODO-NULLABLE: https://github.com/dotnet/roslyn/issues/26761
        }
Exemplo n.º 3
0
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
            return(Utf8Utility.ValidateAndFixupUtf8String(newString) !); // TODO-NULLABLE: Remove ! when nullable attributes are respected
        }
Exemplo n.º 4
0
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
            return(Utf8Utility.ValidateAndFixupUtf8String(newString));
        }
Exemplo n.º 5
0
        [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharCountCommon
        private protected sealed override unsafe int GetCharCountFast(byte *pBytes, int bytesLength, DecoderFallback?fallback, out int bytesConsumed)
        {
            // The number of UTF-16 code units will never exceed the number of UTF-8 code units,
            // so the addition at the end of this method will not overflow.

            byte *ptrToFirstInvalidByte = Utf8Utility.GetPointerToFirstInvalidByte(pBytes, bytesLength, out int utf16CodeUnitCountAdjustment, out _);

            int tempBytesConsumed = (int)(ptrToFirstInvalidByte - pBytes);

            bytesConsumed = tempBytesConsumed;

            return(tempBytesConsumed + utf16CodeUnitCountAdjustment);
        }
Exemplo n.º 6
0
        public void PeekFirstSequence_WithIncompleteSequence_ReturnsIncomplete(byte[] sequence)
        {
            // Act

            var asUtf8Bytes = NativeMemory.GetProtectedReadonlyBuffer(sequence);
            var validity    = Utf8Utility.PeekFirstSequence(asUtf8Bytes, out var numBytesConsumed, out var scalarValue);

            // Assert

            Assert.Equal(SequenceValidity.Incomplete, validity);
            Assert.Equal(sequence.Length, numBytesConsumed);
            Assert.Equal(UnicodeScalar.ReplacementChar, scalarValue);
        }
Exemplo n.º 7
0
        public void IsWellFormedUtf8String_WithStringOfAllPossibleScalarValues_ReturnsTrue()
        {
            // Arrange

            byte[] allScalarsAsUtf8 = _utf8EncodingWithoutReplacement.GetBytes(_stringWithAllScalars.Value);
            var    buffer           = NativeMemory.AllocateFromExistingData(allScalarsAsUtf8, PoisonPagePlacement.AfterSpan);

            buffer.MakeReadonly();

            // Act & assert

            Assert.True(Utf8Utility.IsWellFormedUtf8String(buffer.Span));

            buffer.Dispose();
        }
Exemplo n.º 8
0
        public void PeekFirstSequence_WithEmptyInput_ReturnsEmptyValidity()
        {
            // Act

            var buffer   = NativeMemory.Allocate(0, PoisonPagePlacement.AfterSpan);
            var validity = Utf8Utility.PeekFirstSequence(buffer.Span, out var numBytesConsumed, out var scalarValue);

            // Assert

            Assert.Equal(SequenceValidity.Empty, validity);
            Assert.Equal(0, numBytesConsumed);
            Assert.Equal(UnicodeScalar.ReplacementChar, scalarValue);

            buffer.Dispose();
        }
Exemplo n.º 9
0
        public void IsWellFormedUtf8String_WithCorruptedStringOfAllPossibleScalarValues_ReturnsFalse()
        {
            // Arrange

            byte[] allScalarsAsUtf8 = _utf8EncodingWithoutReplacement.GetBytes(_stringWithAllScalars.Value);
            allScalarsAsUtf8[0x1000] ^= 0x80; // modify the high bit of one of the characters, which will corrupt the header
            var buffer = NativeMemory.AllocateFromExistingData(allScalarsAsUtf8, PoisonPagePlacement.AfterSpan);

            buffer.MakeReadonly();

            // Act & assert

            Assert.False(Utf8Utility.IsWellFormedUtf8String(buffer.Span));

            buffer.Dispose();
        }
Exemplo n.º 10
0
        private void TryReadFirstRune_WithValidInput_ReturnsScalarValue_Core(int scalarValue)
        {
            // Arrange

            string asUtf16String = Char.ConvertFromUtf32(scalarValue);
            var    asUtf8Bytes   = NativeMemory.GetProtectedReadonlyBuffer(_utf8EncodingWithoutReplacement.GetBytes(asUtf16String));

            // Act

            bool retVal = Utf8Utility.TryReadFirstRune(asUtf8Bytes, out var rune, out var bytesConsumed);

            // Assert

            Assert.True(retVal);
            Assert.Equal(scalarValue, rune);
            Assert.Equal(asUtf8Bytes.Length, bytesConsumed);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
        /// </summary>
        /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// If <paramref name="buffer"/> contains any ill-formed UTF-8 subsequences, those subsequences will
        /// be replaced with <see cref="Rune.ReplacementChar"/> in the returned <see cref="Utf8String"/> instance.
        /// This may result in the returned <see cref="Utf8String"/> having different contents (and thus a different
        /// total byte length) than the source parameter <paramref name="buffer"/>.
        /// </remarks>
        public static Utf8String CreateFromRelaxed(ReadOnlySpan <byte> buffer)
        {
            if (buffer.IsEmpty)
            {
                return(Empty);
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(buffer.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length);

            // Now perform validation & fixup.

            return(Utf8Utility.ValidateAndFixupUtf8String(newString));
        }
        private static unsafe void GetIndexOfFirstInvalidUtf8Sequence_Test_Core(byte[] input, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount)
        {
            // Arrange

            using BoundedMemory <byte> boundedMemory = BoundedMemory.AllocateFromExistingData(input);
            boundedMemory.MakeReadonly();

            // Act

            int actualRetVal;
            int actualSurrogatePairCount;
            int actualRuneCount;

            fixed(byte *pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span))
            {
                byte *pFirstInvalidByte = Utf8Utility.GetPointerToFirstInvalidByte(pInputBuffer, input.Length, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment);

                long ptrDiff = pFirstInvalidByte - pInputBuffer;

                Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range.");

                Assert.True(utf16CodeUnitCountAdjustment <= 0, "UTF-16 code unit count adjustment must be 0 or negative.");
                Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative.");

                actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff;

                // The last two 'out' parameters are:
                // a) The number to be added to the "bytes processed" return value to come up with the total UTF-16 code unit count, and
                // b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count.

                int totalUtf16CodeUnitCount = (int)ptrDiff + utf16CodeUnitCountAdjustment;

                actualRuneCount = totalUtf16CodeUnitCount + scalarCountAdjustment;

                // Surrogate pair count is number of UTF-16 code units less the number of scalars.

                actualSurrogatePairCount = totalUtf16CodeUnitCount - actualRuneCount;
            }

            // Assert

            Assert.Equal(expectedRetVal, actualRetVal);
            Assert.Equal(expectedRuneCount, actualRuneCount);
            Assert.Equal(expectedSurrogatePairCount, actualSurrogatePairCount);
        }
Exemplo n.º 13
0
        public void GetExpectedNumberOfContinuationBytes_ForAllInputs()
        {
            // For [ 00..7F ], ASCII characters
            for (uint i = 0x00; i <= 0x7F; i++)
            {
                Assert.Equal(0, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }

            // For [ 80..BF ], continuation code units (never valid start bytes)
            for (uint i = 0x80; i <= 0xBF; i++)
            {
                Assert.Equal(0, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }

            // For [ C0..C1 ], overlong 2-byte starting code units (never valid bytes)
            for (uint i = 0x80; i <= 0xBF; i++)
            {
                Assert.Equal(0, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }

            // For [ C2..DF ], 2-byte sequence starting markers
            for (uint i = 0xC2; i <= 0xDF; i++)
            {
                Assert.Equal(1, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }

            // For [ E0..EF ], 3-byte sequence starting markers
            for (uint i = 0xE0; i <= 0xEF; i++)
            {
                Assert.Equal(2, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }

            // For [ F0..F4 ], 4-byte sequence starting markers
            for (uint i = 0xF0; i <= 0xF4; i++)
            {
                Assert.Equal(3, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }

            // For [ F5..FF ], never valid UTF-8 code units
            for (uint i = 0xF5; i <= 0xFF; i++)
            {
                Assert.Equal(0, Utf8Utility.GetExpectedNumberOfContinuationBytes((byte)i));
            }
        }
Exemplo n.º 14
0
        private void TryReadFirstRuneAsUtf16_WithValidInput_ReturnsScalarValue_Core(int scalarValue)
        {
            // Arrange

            string asUtf16String = Char.ConvertFromUtf32(scalarValue);
            var    asUtf8Bytes   = NativeMemory.GetProtectedReadonlyBuffer(_utf8EncodingWithoutReplacement.GetBytes(asUtf16String));
            var    chars         = NativeMemory.GetProtectedWriteableCharBuffer(2);

            // Act

            bool retVal = Utf8Utility.TryReadFirstRuneAsUtf16(asUtf8Bytes, chars, out var bytesConsumed, out var charsWritten);

            // Assert

            Assert.True(retVal);
            Assert.Equal(asUtf16String, new String(chars.ToArray(), 0, charsWritten));
            Assert.Equal(asUtf16String.Length, charsWritten);
            Assert.Equal(asUtf8Bytes.Length, bytesConsumed);
        }
Exemplo n.º 15
0
        /// <summary>
        /// Creates a new <see cref="Utf8String"/> instance populated with a copy of the provided contents.
        /// Please see remarks for important safety information about this method.
        /// </summary>
        /// <param name="utf8Contents">The contents to copy to the new <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// This factory method can be used as an optimization to skip the validation step that the
        /// <see cref="Utf8String"/> constructors normally perform. The contract of this method requires that
        /// <paramref name="utf8Contents"/> contain only well-formed UTF-8 data, as <see cref="Utf8String"/>
        /// contractually guarantees that it contains only well-formed UTF-8 data, and runtime instability
        /// could occur if a caller violates this guarantee.
        /// </remarks>
        public static Utf8String UnsafeCreateWithoutValidation(ReadOnlySpan <byte> utf8Contents)
        {
            if (utf8Contents.IsEmpty)
            {
                return(Empty); // special-case empty input
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(utf8Contents.Length);

            utf8Contents.CopyTo(newString.DangerousGetMutableSpan());

            // The line below is removed entirely in release builds.

            Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Buffer contained ill-formed UTF-8 data.");

            return(newString);
        }
Exemplo n.º 16
0
        [InlineData("25249-0.txt")] // Chinese, UTF-8 (primarily 3-byte sequences)
        public void GetUtf16CharCount_UsingNewEncoder(string resourceName)
        {
            ReadOnlySpan <byte> utf8Text = ReadTestResource(resourceName);

            // Call UTF8Encoding.GetCharCount once to ensure it's JITted

            bool succeeded = Utf8Utility.TryGetUtf16CharCount(utf8Text, out _);

            Assert.True(succeeded);

            // Perform perf test

            foreach (var iteration in Benchmark.Iterations)
            {
                using (iteration.StartMeasurement())
                {
                    for (int i = 0; i < Benchmark.InnerIterationCount; i++)
                    {
                        Utf8Utility.TryGetUtf16CharCount(utf8Text, out _);
                    }
                }
            }
        }
Exemplo n.º 17
0
        private static byte[] InitializeBuffer(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty._bytes);
            }

            // Create and populate the Utf8String buffer.

            byte[] newBuffer = AllocateBuffer(value.Length);
            value.CopyTo(newBuffer);

            // Now perform validation.
            // Reminder: Perform validation over the copy, not over the source.

            if (!Utf8Utility.IsWellFormedUtf8(newBuffer))
            {
                throw new ArgumentException(
                          message: SR.Utf8String_InputContainedMalformedUtf8,
                          paramName: nameof(value));
            }

            return(newBuffer);
        }
Exemplo n.º 18
0
        private static int EscapeNextBytes(ReadOnlySpan <byte> value, ref Span <byte> destination, ref int written)
        {
            SequenceValidity status = Utf8Utility.PeekFirstSequence(value, out int numBytesConsumed, out UnicodeScalar unicodeScalar);

            if (status != SequenceValidity.WellFormed)
            {
                JsonThrowHelper.ThrowJsonWriterException("Invalid UTF-8 string.");
            }

            destination[written++] = (byte)'\\';
            int scalar = unicodeScalar.Value;

            switch (scalar)
            {
            case '\n':
                destination[written++] = (byte)'n';
                break;

            case '\r':
                destination[written++] = (byte)'r';
                break;

            case '\t':
                destination[written++] = (byte)'t';
                break;

            case '\\':
                destination[written++] = (byte)'\\';
                break;

            case '/':
                destination[written++] = (byte)'/';
                break;

            case '\b':
                destination[written++] = (byte)'b';
                break;

            case '\f':
                destination[written++] = (byte)'f';
                break;

            default:
                destination[written++] = (byte)'u';
                if (scalar < 0x10000)
                {
                    WriteHex(scalar, ref destination, ref written);
                }
                else
                {
                    int quotient  = DivMod(scalar - 0x10000, 0x400, out int remainder);
                    int firstChar = quotient + 0xD800;
                    int nextChar  = remainder + 0xDC00;
                    WriteHex(firstChar, ref destination, ref written);
                    destination[written++] = (byte)'\\';
                    destination[written++] = (byte)'u';
                    WriteHex(nextChar, ref destination, ref written);
                }
                break;
            }
            return(numBytesConsumed);
        }
Exemplo n.º 19
0
 public bool UsingNewEncoder()
 {
     return(Utf8Utility.TryGetUtf16CharCount(utf8Text, out _));
 }