/* * STATIC FACTORIES */ /// <summary> /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data. /// </summary> /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param> /// <param name="value"> /// When this method returns, contains a <see cref="Utf8String"/> with the same contents as <paramref name="buffer"/> /// if <paramref name="buffer"/> consists of well-formed UTF-8 data. Otherwise, <see langword="null"/>. /// </param> /// <returns> /// <see langword="true"/> if <paramref name="buffer"/> contains well-formed UTF-8 data and <paramref name="value"/> /// contains the <see cref="Utf8String"/> encapsulating a copy of that data. Otherwise, <see langword="false"/>. /// </returns> /// <remarks> /// This method is a non-throwing equivalent of the constructor <see cref="Utf8String(ReadOnlySpan{byte})"/>. /// </remarks> public static bool TryCreateFrom(ReadOnlySpan <byte> buffer, [NotNullWhen(true)] out Utf8String?value) { if (buffer.IsEmpty) { value = Empty; // it's valid to create a Utf8String instance from an empty buffer; we'll return the Empty singleton return(true); } // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); #else buffer.CopyTo(newString.DangerousGetMutableSpan()); #endif // Now perform validation. // Reminder: Perform validation over the copy, not over the source. if (Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) { value = newString; return(true); } else { value = default; return(false); } }
private Utf8String Ctor(ReadOnlySpan <byte> value) { if (value.IsEmpty) { return(Empty); } // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(value.Length); Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length); // Now perform validation. // Reminder: Perform validation over the copy, not over the source. if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) { throw new ArgumentException( message: SR.Utf8String_InputContainedMalformedUtf8, paramName: nameof(value)); } return(newString); }
private Utf8String InternalSubstringWithoutCorrectnessChecks(int startIndex, int length) { Debug.Assert(startIndex >= 0, "StartIndex cannot be negative."); Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator)."); Debug.Assert(length >= 0, "Length cannot be negative."); Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string."); // In debug mode, perform the checks anyway. It's ok if we read just past the end of the // Utf8String instance, since we'll just be reading the null terminator (which is safe). Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)), "Somebody is trying to split this Utf8String improperly."); Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length)), "Somebody is trying to split this Utf8String improperly."); if (length == 0) { return(Empty); } else if (length == this.Length) { return(this); } else { Utf8String newString = FastAllocateSkipZeroInit(length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); #else this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); #endif return(newString); } }
/// <summary> /// Substrings this <see cref="Utf8String"/> without bounds checking. /// </summary> private Utf8String InternalSubstring(int startIndex, int length) { Debug.Assert(startIndex >= 0, "StartIndex cannot be negative."); Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator)."); Debug.Assert(length >= 0, "Length cannot be negative."); Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string."); Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions."); // Since Utf8String instances must contain well-formed UTF-8 data, we cannot allow a substring such that // either boundary of the new substring splits a multi-byte UTF-8 subsequence. Fortunately this is a very // easy check: since we assume the original buffer consisted entirely of well-formed UTF-8 data, all we // need to do is check that neither the substring we're about to create nor the substring that would // follow immediately thereafter begins with a UTF-8 continuation byte. Should this occur, it means that // the UTF-8 lead byte is in a prior substring, which would indicate a multi-byte sequence has been split. // It's ok for us to dereference the element immediately after the end of the Utf8String instance since // we know it's a null terminator. if (Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)) || Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length))) { ThrowImproperStringSplit(); } Utf8String newString = FastAllocateSkipZeroInit(length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); #else this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); #endif return(newString); }
internal static Utf8String CreateFromRune(Rune value) { Utf8String newString = FastAllocate(value.Utf8SequenceLength); int bytesWritten = value.EncodeToUtf8(new Span <byte>(ref newString.DangerousGetMutableReference(), newString.Length)); Debug.Assert(bytesWritten == value.Utf8SequenceLength); return(newString); }
private Utf8String Ctor(ReadOnlySpan <byte> value) { if (value.IsEmpty) { return(Empty); } Utf8String newString = FastAllocate(value.Length); Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length); return(Utf8Utility.ValidateAndFixupUtf8String(newString)); }
/* * HELPER METHODS */ /// <summary> /// Creates a <see cref="Utf8String"/> instance from existing data, bypassing validation. /// Also allows the caller to set flags dictating various attributes of the data. /// </summary> internal static Utf8String DangerousCreateWithoutValidation(ReadOnlySpan <byte> utf8Data, bool assumeWellFormed = false, bool assumeAscii = false) { if (utf8Data.IsEmpty) { return(Empty); } Utf8String newString = FastAllocate(utf8Data.Length); utf8Data.CopyTo(new Span <byte>(ref newString.DangerousGetMutableReference(), newString.Length)); return(newString); }
private Utf8String Ctor(ReadOnlySpan <byte> value) { if (value.IsEmpty) { return(Empty); } Utf8String newString = FastAllocate(value.Length); Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length); return(Utf8Utility.ValidateAndFixupUtf8String(newString) !); // TODO-NULLABLE: https://github.com/dotnet/roslyn/issues/26761 }
private Utf8String Ctor(ReadOnlySpan <byte> value) { if (value.IsEmpty) { return(Empty); } Utf8String newString = FastAllocate(value.Length); Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length); return(Utf8Utility.ValidateAndFixupUtf8String(newString) !); // TODO-NULLABLE: Remove ! when nullable attributes are respected }
private Utf8String Ctor(ReadOnlySpan <char> value) { if (value.IsEmpty) { return(Empty); } // TODO_UTF8STRING: Call into optimized transcoding routine when it's available. Utf8String newString = FastAllocate(Encoding.UTF8.GetByteCount(value)); Encoding.UTF8.GetBytes(value, new Span <byte>(ref newString.DangerousGetMutableReference(), newString.Length)); return(newString); }
/// <summary> /// Substrings this <see cref="Utf8String"/> without bounds checking. /// </summary> private Utf8String InternalSubstring(int startIndex, int length) { Debug.Assert(startIndex >= 0, "StartIndex cannot be negative."); Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator)."); Debug.Assert(length >= 0, "Length cannot be negative."); Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string."); Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions."); Utf8String newString = FastAllocate(length); Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); return(newString); }
/// <summary> /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data. /// </summary> /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param> /// <remarks> /// If <paramref name="buffer"/> contains any ill-formed UTF-8 subsequences, those subsequences will /// be replaced with <see cref="Rune.ReplacementChar"/> in the returned <see cref="Utf8String"/> instance. /// This may result in the returned <see cref="Utf8String"/> having different contents (and thus a different /// total byte length) than the source parameter <paramref name="buffer"/>. /// </remarks> public static Utf8String CreateFromRelaxed(ReadOnlySpan <byte> buffer) { if (buffer.IsEmpty) { return(Empty); } // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); // Now perform validation & fixup. return(Utf8Utility.ValidateAndFixupUtf8String(newString)); }
internal static Utf8String CreateFromRune(Rune value) { // Can skip zero-init since we're going to populate the entire buffer. Utf8String newString = FastAllocateSkipZeroInit(value.Utf8SequenceLength); if (value.IsAscii) { // Fast path: If an ASCII value, just allocate the one-byte string and fill in the single byte contents. newString.DangerousGetMutableReference() = (byte)value.Value; return(newString); } else { // Slow path: If not ASCII, allocate a string of the appropriate length and fill in the multi-byte contents. int bytesWritten = value.EncodeToUtf8(newString.DangerousGetMutableSpan()); Debug.Assert(newString.Length == bytesWritten); return(newString); } }