/// <summary> /// Creates a new <see cref="Utf8String"/> instance, allowing the provided delegate to populate the /// instance data of the returned object. /// </summary> /// <typeparam name="TState">Type of the state object provided to <paramref name="action"/>.</typeparam> /// <param name="length">The length, in bytes, of the <see cref="Utf8String"/> instance to create.</param> /// <param name="state">The state object to provide to <paramref name="action"/>.</param> /// <param name="action">The callback which will be invoked to populate the returned <see cref="Utf8String"/>.</param> /// <remarks> /// The runtime will perform UTF-8 validation over the contents provided by the <paramref name="action"/> delegate. /// If an invalid UTF-8 subsequence is detected, the invalid subsequence is replaced with <see cref="Rune.ReplacementChar"/> /// in the returned <see cref="Utf8String"/> instance. This could result in the returned <see cref="Utf8String"/> instance /// having a different byte length than specified by the <paramref name="length"/> parameter. /// </remarks> public static Utf8String CreateRelaxed <TState>(int length, TState state, SpanAction <byte, TState> action) { if (length < 0) { ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); } if (action is null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); } if (length == 0) { return(Empty); // special-case empty input } // Create and populate the Utf8String instance. // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. Utf8String newString = FastAllocate(length); action(newString.DangerousGetMutableSpan(), state); // Now perform validation and fixup. return(Utf8Utility.ValidateAndFixupUtf8String(newString)); }
/// <summary> /// Creates a new <see cref="Utf8String"/> instance, allowing the provided delegate to populate the /// instance data of the returned object. Please see remarks for important safety information about /// this method. /// </summary> /// <typeparam name="TState">Type of the state object provided to <paramref name="action"/>.</typeparam> /// <param name="length">The length, in bytes, of the <see cref="Utf8String"/> instance to create.</param> /// <param name="state">The state object to provide to <paramref name="action"/>.</param> /// <param name="action">The callback which will be invoked to populate the returned <see cref="Utf8String"/>.</param> /// <remarks> /// This factory method can be used as an optimization to skip the validation step that /// <see cref="Create{TState}(int, TState, SpanAction{byte, TState})"/> normally performs. The contract /// of this method requires that <paramref name="action"/> populate the buffer with well-formed UTF-8 /// data, as <see cref="Utf8String"/> contractually guarantees that it contains only well-formed UTF-8 data, /// and runtime instability could occur if a caller violates this guarantee. /// </remarks> public static Utf8String UnsafeCreateWithoutValidation <TState>(int length, TState state, SpanAction <byte, TState> action) { if (length < 0) { ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); } if (action is null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); } if (length == 0) { return(Empty); // special-case empty input } // Create and populate the Utf8String instance. // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. Utf8String newString = FastAllocate(length); action(newString.DangerousGetMutableSpan(), state); // The line below is removed entirely in release builds. Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Callback populated the buffer with ill-formed UTF-8 data."); return(newString); }
/// <summary> /// Creates a new <see cref="Utf8String"/> instance, allowing the provided delegate to populate the /// instance data of the returned object. /// </summary> /// <typeparam name="TState">Type of the state object provided to <paramref name="action"/>.</typeparam> /// <param name="length">The length, in bytes, of the <see cref="Utf8String"/> instance to create.</param> /// <param name="state">The state object to provide to <paramref name="action"/>.</param> /// <param name="action">The callback which will be invoked to populate the returned <see cref="Utf8String"/>.</param> /// <exception cref="ArgumentException"> /// Thrown if <paramref name="action"/> populates the buffer with ill-formed UTF-8 data. /// </exception> /// <remarks> /// The runtime will perform UTF-8 validation over the contents provided by the <paramref name="action"/> delegate. /// If an invalid UTF-8 subsequence is detected, an exception is thrown. /// </remarks> public static Utf8String Create <TState>(int length, TState state, SpanAction <byte, TState> action) { if (length < 0) { ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum(); } if (action is null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action); } if (length == 0) { return(Empty); // special-case empty input } // Create and populate the Utf8String instance. // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code. Utf8String newString = FastAllocate(length); action(newString.DangerousGetMutableSpan(), state); // Now perform validation. if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) { throw new ArgumentException( message: SR.Utf8String_CallbackProvidedMalformedData, paramName: nameof(action)); } return(newString); }
/* * STATIC FACTORIES */ /// <summary> /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data. /// </summary> /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param> /// <param name="value"> /// When this method returns, contains a <see cref="Utf8String"/> with the same contents as <paramref name="buffer"/> /// if <paramref name="buffer"/> consists of well-formed UTF-8 data. Otherwise, <see langword="null"/>. /// </param> /// <returns> /// <see langword="true"/> if <paramref name="buffer"/> contains well-formed UTF-8 data and <paramref name="value"/> /// contains the <see cref="Utf8String"/> encapsulating a copy of that data. Otherwise, <see langword="false"/>. /// </returns> /// <remarks> /// This method is a non-throwing equivalent of the constructor <see cref="Utf8String(ReadOnlySpan{byte})"/>. /// </remarks> public static bool TryCreateFrom(ReadOnlySpan <byte> buffer, [NotNullWhen(true)] out Utf8String?value) { if (buffer.IsEmpty) { value = Empty; // it's valid to create a Utf8String instance from an empty buffer; we'll return the Empty singleton return(true); } // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); #else buffer.CopyTo(newString.DangerousGetMutableSpan()); #endif // Now perform validation. // Reminder: Perform validation over the copy, not over the source. if (Utf8Utility.IsWellFormedUtf8(newString.AsBytes())) { value = newString; return(true); } else { value = default; return(false); } }
private Utf8String InternalSubstringWithoutCorrectnessChecks(int startIndex, int length) { Debug.Assert(startIndex >= 0, "StartIndex cannot be negative."); Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator)."); Debug.Assert(length >= 0, "Length cannot be negative."); Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string."); // In debug mode, perform the checks anyway. It's ok if we read just past the end of the // Utf8String instance, since we'll just be reading the null terminator (which is safe). Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)), "Somebody is trying to split this Utf8String improperly."); Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length)), "Somebody is trying to split this Utf8String improperly."); if (length == 0) { return(Empty); } else if (length == this.Length) { return(this); } else { Utf8String newString = FastAllocateSkipZeroInit(length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); #else this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); #endif return(newString); } }
/// <summary> /// Substrings this <see cref="Utf8String"/> without bounds checking. /// </summary> private Utf8String InternalSubstring(int startIndex, int length) { Debug.Assert(startIndex >= 0, "StartIndex cannot be negative."); Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator)."); Debug.Assert(length >= 0, "Length cannot be negative."); Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string."); Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions."); // Since Utf8String instances must contain well-formed UTF-8 data, we cannot allow a substring such that // either boundary of the new substring splits a multi-byte UTF-8 subsequence. Fortunately this is a very // easy check: since we assume the original buffer consisted entirely of well-formed UTF-8 data, all we // need to do is check that neither the substring we're about to create nor the substring that would // follow immediately thereafter begins with a UTF-8 continuation byte. Should this occur, it means that // the UTF-8 lead byte is in a prior substring, which would indicate a multi-byte sequence has been split. // It's ok for us to dereference the element immediately after the end of the Utf8String instance since // we know it's a null terminator. if (Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)) || Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length))) { ThrowImproperStringSplit(); } Utf8String newString = FastAllocateSkipZeroInit(length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length); #else this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan()); #endif return(newString); }
/// <summary> /// Creates a new <see cref="Utf8String"/> instance populated with a copy of the provided contents. /// Please see remarks for important safety information about this method. /// </summary> /// <param name="utf8Contents">The contents to copy to the new <see cref="Utf8String"/>.</param> /// <remarks> /// This factory method can be used as an optimization to skip the validation step that the /// <see cref="Utf8String"/> constructors normally perform. The contract of this method requires that /// <paramref name="utf8Contents"/> contain only well-formed UTF-8 data, as <see cref="Utf8String"/> /// contractually guarantees that it contains only well-formed UTF-8 data, and runtime instability /// could occur if a caller violates this guarantee. /// </remarks> public static Utf8String UnsafeCreateWithoutValidation(ReadOnlySpan <byte> utf8Contents) { if (utf8Contents.IsEmpty) { return(Empty); // special-case empty input } // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(utf8Contents.Length); utf8Contents.CopyTo(newString.DangerousGetMutableSpan()); // The line below is removed entirely in release builds. Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Buffer contained ill-formed UTF-8 data."); return(newString); }
/// <summary> /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data. /// </summary> /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param> /// <remarks> /// If <paramref name="buffer"/> contains any ill-formed UTF-8 subsequences, those subsequences will /// be replaced with <see cref="Rune.ReplacementChar"/> in the returned <see cref="Utf8String"/> instance. /// This may result in the returned <see cref="Utf8String"/> having different contents (and thus a different /// total byte length) than the source parameter <paramref name="buffer"/>. /// </remarks> public static Utf8String CreateFromRelaxed(ReadOnlySpan <byte> buffer) { if (buffer.IsEmpty) { return(Empty); } // Create and populate the Utf8String instance. Utf8String newString = FastAllocateSkipZeroInit(buffer.Length); #if SYSTEM_PRIVATE_CORELIB Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length); #else buffer.CopyTo(newString.DangerousGetMutableSpan()); #endif // Now perform validation & fixup. return(Utf8Utility.ValidateAndFixupUtf8String(newString)); }
internal static Utf8String CreateFromRune(Rune value) { // Can skip zero-init since we're going to populate the entire buffer. Utf8String newString = FastAllocateSkipZeroInit(value.Utf8SequenceLength); if (value.IsAscii) { // Fast path: If an ASCII value, just allocate the one-byte string and fill in the single byte contents. newString.DangerousGetMutableReference() = (byte)value.Value; return(newString); } else { // Slow path: If not ASCII, allocate a string of the appropriate length and fill in the multi-byte contents. int bytesWritten = value.EncodeToUtf8(newString.DangerousGetMutableSpan()); Debug.Assert(newString.Length == bytesWritten); return(newString); } }