Пример #1
0
        /// <summary>
        /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data;
        /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as
        /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced
        /// with U+FFFD.
        /// </summary>
        public static Utf8String ValidateAndFixupUtf8String(Utf8String value)
        {
            if (value.Length == 0)
            {
                return value;
            }

            ReadOnlySpan<byte> valueAsBytes = value.AsBytes();

            int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _);
            if (idxOfFirstInvalidData < 0)
            {
                return value;
            }

            // TODO_UTF8STRING: Replace this with the faster implementation once it's available.
            // (The faster implementation is in the dev/utf8string_bak branch currently.)

            MemoryStream memStream = new MemoryStream();
#if !NETSTANDARD2_0
            memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData));

            valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData);
            do
            {
                if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done)
                {
                    // Valid scalar value - copy data as-is to MemoryStream
                    memStream.Write(valueAsBytes.Slice(0, bytesConsumed));
                }
                else
                {
                    // Invalid scalar value - copy U+FFFD to MemoryStream
                    memStream.Write(ReplacementCharSequence);
                }

                valueAsBytes = valueAsBytes.Slice(bytesConsumed);
            } while (!valueAsBytes.IsEmpty);
#else
            if (!MemoryMarshal.TryGetArray(value.AsMemoryBytes(), out ArraySegment<byte> valueArraySegment))
            {
                Debug.Fail("Utf8String on netstandard should always be backed by an array.");
            }

            memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, idxOfFirstInvalidData);

            valueArraySegment = new ArraySegment<byte>(
                valueArraySegment.Array,
                idxOfFirstInvalidData,
                valueArraySegment.Count - idxOfFirstInvalidData);
            do
            {
                if (Rune.DecodeFromUtf8(valueArraySegment, out _, out int bytesConsumed) == OperationStatus.Done)
                {
                    // Valid scalar value - copy data as-is to MemoryStream
                    memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, bytesConsumed);
                }
                else
                {
                    // Invalid scalar value - copy U+FFFD to MemoryStream
                    memStream.Write(ReplacementCharSequence, 0, ReplacementCharSequence.Length);
                }

                valueArraySegment = new ArraySegment<byte>(
                    valueArraySegment.Array,
                    valueArraySegment.Offset + bytesConsumed,
                    valueArraySegment.Count - bytesConsumed);
            } while (valueArraySegment.Count > 0);
#endif

            bool success = memStream.TryGetBuffer(out ArraySegment<byte> memStreamBuffer);
            Debug.Assert(success, "Couldn't get underlying MemoryStream buffer.");

            return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer);
        }
        public void AsMemoryBytes_FromUtf8String()
        {
            Assert.True(default(ReadOnlyMemory <byte>).Equals(((Utf8String)null).AsMemoryBytes()));

            Utf8String theString = u8("Hello");

            Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsMemoryBytes().Span);
        }