/// <summary> /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data; /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced /// with U+FFFD. /// </summary> public static Utf8String ValidateAndFixupUtf8String(Utf8String value) { if (value.Length == 0) { return value; } ReadOnlySpan<byte> valueAsBytes = value.AsBytes(); int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); if (idxOfFirstInvalidData < 0) { return value; } // TODO_UTF8STRING: Replace this with the faster implementation once it's available. // (The faster implementation is in the dev/utf8string_bak branch currently.) MemoryStream memStream = new MemoryStream(); #if !NETSTANDARD2_0 memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); do { if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) { // Valid scalar value - copy data as-is to MemoryStream memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); } else { // Invalid scalar value - copy U+FFFD to MemoryStream memStream.Write(ReplacementCharSequence); } valueAsBytes = valueAsBytes.Slice(bytesConsumed); } while (!valueAsBytes.IsEmpty); #else if (!MemoryMarshal.TryGetArray(value.AsMemoryBytes(), out ArraySegment<byte> valueArraySegment)) { Debug.Fail("Utf8String on netstandard should always be backed by an array."); } memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, idxOfFirstInvalidData); valueArraySegment = new ArraySegment<byte>( valueArraySegment.Array, idxOfFirstInvalidData, valueArraySegment.Count - idxOfFirstInvalidData); do { if (Rune.DecodeFromUtf8(valueArraySegment, out _, out int bytesConsumed) == OperationStatus.Done) { // Valid scalar value - copy data as-is to MemoryStream memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, bytesConsumed); } else { // Invalid scalar value - copy U+FFFD to MemoryStream memStream.Write(ReplacementCharSequence, 0, ReplacementCharSequence.Length); } valueArraySegment = new ArraySegment<byte>( valueArraySegment.Array, valueArraySegment.Offset + bytesConsumed, valueArraySegment.Count - bytesConsumed); } while (valueArraySegment.Count > 0); #endif bool success = memStream.TryGetBuffer(out ArraySegment<byte> memStreamBuffer); Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); return Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer); }
public void AsMemoryBytes_FromUtf8String() { Assert.True(default(ReadOnlyMemory <byte>).Equals(((Utf8String)null).AsMemoryBytes())); Utf8String theString = u8("Hello"); Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsMemoryBytes().Span); }