public static void AreEquivalent_Tests(string utf8Input, string utf16Input, bool expected) { Utf8String asUtf8 = u8(utf8Input); // Call all three overloads Assert.Equal(expected, Utf8String.AreEquivalent(asUtf8, utf16Input)); Assert.Equal(expected, Utf8String.AreEquivalent(asUtf8.AsSpan(), utf16Input.AsSpan())); Assert.Equal(expected, Utf8String.AreEquivalent(asUtf8.AsBytes(), utf16Input.AsSpan())); }
public static void GetHashCode_ReturnsRandomized() { Utf8String a = u8("Hello"); Utf8String b = new Utf8String(a.AsBytes()); Assert.NotSame(a, b); Assert.Equal(a.GetHashCode(), b.GetHashCode()); Utf8String c = u8("Goodbye"); Utf8String d = new Utf8String(c.AsBytes()); Assert.NotSame(c, d); Assert.Equal(c.GetHashCode(), d.GetHashCode()); Assert.NotEqual(a.GetHashCode(), c.GetHashCode()); }
/// <summary> /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data; /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced /// with U+FFD. /// </summary> public static Utf8String ValidateAndFixupUtf8String(Utf8String value) { if (Utf8String.IsNullOrEmpty(value)) { return(value); } ReadOnlySpan <byte> valueAsBytes = value.AsBytes(); int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); if (idxOfFirstInvalidData < 0) { return(value); } // TODO_UTF8STRING: Replace this with the faster implementation once it's available. // (The faster implementation is in the dev/utf8string_bak branch currently.) MemoryStream memStream = new MemoryStream(); memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); do { if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) { // Valid scalar value - copy data as-is to MemoryStream memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); } else { // Invalid scalar value - copy U+FFFD to MemoryStream memStream.Write(ReplacementCharSequence); } valueAsBytes = valueAsBytes.Slice(bytesConsumed); } while (!valueAsBytes.IsEmpty); bool success = memStream.TryGetBuffer(out ArraySegment <byte> memStreamBuffer); Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); return(Utf8String.DangerousCreateWithoutValidation(memStreamBuffer, assumeWellFormed: true)); }
/// <summary> /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data; /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced /// with U+FFFD. /// </summary> public static Utf8String ValidateAndFixupUtf8String(Utf8String value) { if (value.Length == 0) { return(value); } ReadOnlySpan <byte> valueAsBytes = value.AsBytes(); int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); if (idxOfFirstInvalidData < 0) { return(value); } // TODO_UTF8STRING: Replace this with the faster implementation once it's available. // (The faster implementation is in the dev/utf8string_bak branch currently.) MemoryStream memStream = new MemoryStream(); #if (!NETSTANDARD2_0 && !NETFRAMEWORK) memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); do { if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) { // Valid scalar value - copy data as-is to MemoryStream memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); } else { // Invalid scalar value - copy U+FFFD to MemoryStream memStream.Write(ReplacementCharSequence); } valueAsBytes = valueAsBytes.Slice(bytesConsumed); } while (!valueAsBytes.IsEmpty); #else if (!MemoryMarshal.TryGetArray(value.AsMemoryBytes(), out ArraySegment <byte> valueArraySegment)) { Debug.Fail("Utf8String on netstandard should always be backed by an array."); } memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, idxOfFirstInvalidData); valueArraySegment = new ArraySegment <byte>( valueArraySegment.Array, idxOfFirstInvalidData, valueArraySegment.Count - idxOfFirstInvalidData); do { if (Rune.DecodeFromUtf8(valueArraySegment, out _, out int bytesConsumed) == OperationStatus.Done) { // Valid scalar value - copy data as-is to MemoryStream memStream.Write(valueArraySegment.Array, valueArraySegment.Offset, bytesConsumed); } else { // Invalid scalar value - copy U+FFFD to MemoryStream memStream.Write(ReplacementCharSequence, 0, ReplacementCharSequence.Length); } valueArraySegment = new ArraySegment <byte>( valueArraySegment.Array, valueArraySegment.Offset + bytesConsumed, valueArraySegment.Count - bytesConsumed); } while (valueArraySegment.Count > 0); #endif bool success = memStream.TryGetBuffer(out ArraySegment <byte> memStreamBuffer); Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); return(Utf8String.UnsafeCreateWithoutValidation(memStreamBuffer)); }
public void AsBytes_FromUtf8String() { Assert.True(default(ReadOnlySpan <byte>) == ((Utf8String)null).AsBytes()); Utf8String theString = u8("Hello"); Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsBytes()); }
public void AsBytes_FromUtf8String_netcoreapp() { Utf8String theString = u8("Hello"); Assert.True(MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in theString.GetPinnableReference()), 5) == theString.AsBytes()); }
public static int GetByteLength(this Utf8String value) { return(value.AsBytes().Length); }
protected override bool TryComputeLength(out long length) { length = _content.AsBytes().Length; return(true); }