Exemplo n.º 1
0
        /// <summary>
        /// Creates a new <see cref="Utf8String"/> instance, allowing the provided delegate to populate the
        /// instance data of the returned object.
        /// </summary>
        /// <typeparam name="TState">Type of the state object provided to <paramref name="action"/>.</typeparam>
        /// <param name="length">The length, in bytes, of the <see cref="Utf8String"/> instance to create.</param>
        /// <param name="state">The state object to provide to <paramref name="action"/>.</param>
        /// <param name="action">The callback which will be invoked to populate the returned <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// The runtime will perform UTF-8 validation over the contents provided by the <paramref name="action"/> delegate.
        /// If an invalid UTF-8 subsequence is detected, the invalid subsequence is replaced with <see cref="Rune.ReplacementChar"/>
        /// in the returned <see cref="Utf8String"/> instance. This could result in the returned <see cref="Utf8String"/> instance
        /// having a different byte length than specified by the <paramref name="length"/> parameter.
        /// </remarks>
        public static Utf8String CreateRelaxed <TState>(int length, TState state, SpanAction <byte, TState> action)
        {
            if (length < 0)
            {
                ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum();
            }

            if (action is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action);
            }

            if (length == 0)
            {
                return(Empty); // special-case empty input
            }

            // Create and populate the Utf8String instance.
            // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code.

            Utf8String newString = FastAllocate(length);

            action(newString.DangerousGetMutableSpan(), state);

            // Now perform validation and fixup.

            return(Utf8Utility.ValidateAndFixupUtf8String(newString));
        }
Exemplo n.º 2
0
        /// <summary>
        /// Creates a new <see cref="Utf8String"/> instance, allowing the provided delegate to populate the
        /// instance data of the returned object. Please see remarks for important safety information about
        /// this method.
        /// </summary>
        /// <typeparam name="TState">Type of the state object provided to <paramref name="action"/>.</typeparam>
        /// <param name="length">The length, in bytes, of the <see cref="Utf8String"/> instance to create.</param>
        /// <param name="state">The state object to provide to <paramref name="action"/>.</param>
        /// <param name="action">The callback which will be invoked to populate the returned <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// This factory method can be used as an optimization to skip the validation step that
        /// <see cref="Create{TState}(int, TState, SpanAction{byte, TState})"/> normally performs. The contract
        /// of this method requires that <paramref name="action"/> populate the buffer with well-formed UTF-8
        /// data, as <see cref="Utf8String"/> contractually guarantees that it contains only well-formed UTF-8 data,
        /// and runtime instability could occur if a caller violates this guarantee.
        /// </remarks>
        public static Utf8String UnsafeCreateWithoutValidation <TState>(int length, TState state, SpanAction <byte, TState> action)
        {
            if (length < 0)
            {
                ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum();
            }

            if (action is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action);
            }

            if (length == 0)
            {
                return(Empty); // special-case empty input
            }

            // Create and populate the Utf8String instance.
            // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code.

            Utf8String newString = FastAllocate(length);

            action(newString.DangerousGetMutableSpan(), state);

            // The line below is removed entirely in release builds.

            Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Callback populated the buffer with ill-formed UTF-8 data.");

            return(newString);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Creates a new <see cref="Utf8String"/> instance, allowing the provided delegate to populate the
        /// instance data of the returned object.
        /// </summary>
        /// <typeparam name="TState">Type of the state object provided to <paramref name="action"/>.</typeparam>
        /// <param name="length">The length, in bytes, of the <see cref="Utf8String"/> instance to create.</param>
        /// <param name="state">The state object to provide to <paramref name="action"/>.</param>
        /// <param name="action">The callback which will be invoked to populate the returned <see cref="Utf8String"/>.</param>
        /// <exception cref="ArgumentException">
        /// Thrown if <paramref name="action"/> populates the buffer with ill-formed UTF-8 data.
        /// </exception>
        /// <remarks>
        /// The runtime will perform UTF-8 validation over the contents provided by the <paramref name="action"/> delegate.
        /// If an invalid UTF-8 subsequence is detected, an exception is thrown.
        /// </remarks>
        public static Utf8String Create <TState>(int length, TState state, SpanAction <byte, TState> action)
        {
            if (length < 0)
            {
                ThrowHelper.ThrowLengthArgumentOutOfRange_ArgumentOutOfRange_NeedNonNegNum();
            }

            if (action is null)
            {
                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.action);
            }

            if (length == 0)
            {
                return(Empty); // special-case empty input
            }

            // Create and populate the Utf8String instance.
            // Can't use FastAllocateSkipZeroInit here because we're handing the raw buffer to user code.

            Utf8String newString = FastAllocate(length);

            action(newString.DangerousGetMutableSpan(), state);

            // Now perform validation.

            if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes()))
            {
                throw new ArgumentException(
                          message: SR.Utf8String_CallbackProvidedMalformedData,
                          paramName: nameof(action));
            }

            return(newString);
        }
Exemplo n.º 4
0
        /*
         * STATIC FACTORIES
         */

        /// <summary>
        /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
        /// </summary>
        /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param>
        /// <param name="value">
        /// When this method returns, contains a <see cref="Utf8String"/> with the same contents as <paramref name="buffer"/>
        /// if <paramref name="buffer"/> consists of well-formed UTF-8 data. Otherwise, <see langword="null"/>.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if <paramref name="buffer"/> contains well-formed UTF-8 data and <paramref name="value"/>
        /// contains the <see cref="Utf8String"/> encapsulating a copy of that data. Otherwise, <see langword="false"/>.
        /// </returns>
        /// <remarks>
        /// This method is a non-throwing equivalent of the constructor <see cref="Utf8String(ReadOnlySpan{byte})"/>.
        /// </remarks>
        public static bool TryCreateFrom(ReadOnlySpan <byte> buffer, [NotNullWhen(true)] out Utf8String?value)
        {
            if (buffer.IsEmpty)
            {
                value = Empty; // it's valid to create a Utf8String instance from an empty buffer; we'll return the Empty singleton
                return(true);
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(buffer.Length);

#if SYSTEM_PRIVATE_CORELIB
            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length);
#else
            buffer.CopyTo(newString.DangerousGetMutableSpan());
#endif

            // Now perform validation.
            // Reminder: Perform validation over the copy, not over the source.

            if (Utf8Utility.IsWellFormedUtf8(newString.AsBytes()))
            {
                value = newString;
                return(true);
            }
            else
            {
                value = default;
                return(false);
            }
        }
Exemplo n.º 5
0
        private Utf8String InternalSubstringWithoutCorrectnessChecks(int startIndex, int length)
        {
            Debug.Assert(startIndex >= 0, "StartIndex cannot be negative.");
            Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator).");
            Debug.Assert(length >= 0, "Length cannot be negative.");
            Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string.");

            // In debug mode, perform the checks anyway. It's ok if we read just past the end of the
            // Utf8String instance, since we'll just be reading the null terminator (which is safe).

            Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)), "Somebody is trying to split this Utf8String improperly.");
            Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length)), "Somebody is trying to split this Utf8String improperly.");

            if (length == 0)
            {
                return(Empty);
            }
            else if (length == this.Length)
            {
                return(this);
            }
            else
            {
                Utf8String newString = FastAllocateSkipZeroInit(length);
#if SYSTEM_PRIVATE_CORELIB
                Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length);
#else
                this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan());
#endif
                return(newString);
            }
        }
Exemplo n.º 6
0
        /// <summary>
        /// Substrings this <see cref="Utf8String"/> without bounds checking.
        /// </summary>
        private Utf8String InternalSubstring(int startIndex, int length)
        {
            Debug.Assert(startIndex >= 0, "StartIndex cannot be negative.");
            Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator).");
            Debug.Assert(length >= 0, "Length cannot be negative.");
            Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string.");

            Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions.");

            // Since Utf8String instances must contain well-formed UTF-8 data, we cannot allow a substring such that
            // either boundary of the new substring splits a multi-byte UTF-8 subsequence. Fortunately this is a very
            // easy check: since we assume the original buffer consisted entirely of well-formed UTF-8 data, all we
            // need to do is check that neither the substring we're about to create nor the substring that would
            // follow immediately thereafter begins with a UTF-8 continuation byte. Should this occur, it means that
            // the UTF-8 lead byte is in a prior substring, which would indicate a multi-byte sequence has been split.
            // It's ok for us to dereference the element immediately after the end of the Utf8String instance since
            // we know it's a null terminator.

            if (Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)) ||
                Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length)))
            {
                ThrowImproperStringSplit();
            }

            Utf8String newString = FastAllocateSkipZeroInit(length);

#if SYSTEM_PRIVATE_CORELIB
            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length);
#else
            this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan());
#endif

            return(newString);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Creates a new <see cref="Utf8String"/> instance populated with a copy of the provided contents.
        /// Please see remarks for important safety information about this method.
        /// </summary>
        /// <param name="utf8Contents">The contents to copy to the new <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// This factory method can be used as an optimization to skip the validation step that the
        /// <see cref="Utf8String"/> constructors normally perform. The contract of this method requires that
        /// <paramref name="utf8Contents"/> contain only well-formed UTF-8 data, as <see cref="Utf8String"/>
        /// contractually guarantees that it contains only well-formed UTF-8 data, and runtime instability
        /// could occur if a caller violates this guarantee.
        /// </remarks>
        public static Utf8String UnsafeCreateWithoutValidation(ReadOnlySpan <byte> utf8Contents)
        {
            if (utf8Contents.IsEmpty)
            {
                return(Empty); // special-case empty input
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(utf8Contents.Length);

            utf8Contents.CopyTo(newString.DangerousGetMutableSpan());

            // The line below is removed entirely in release builds.

            Debug.Assert(Utf8Utility.IsWellFormedUtf8(newString.AsBytes()), "Buffer contained ill-formed UTF-8 data.");

            return(newString);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
        /// </summary>
        /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// If <paramref name="buffer"/> contains any ill-formed UTF-8 subsequences, those subsequences will
        /// be replaced with <see cref="Rune.ReplacementChar"/> in the returned <see cref="Utf8String"/> instance.
        /// This may result in the returned <see cref="Utf8String"/> having different contents (and thus a different
        /// total byte length) than the source parameter <paramref name="buffer"/>.
        /// </remarks>
        public static Utf8String CreateFromRelaxed(ReadOnlySpan <byte> buffer)
        {
            if (buffer.IsEmpty)
            {
                return(Empty);
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(buffer.Length);

#if SYSTEM_PRIVATE_CORELIB
            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length);
#else
            buffer.CopyTo(newString.DangerousGetMutableSpan());
#endif

            // Now perform validation & fixup.

            return(Utf8Utility.ValidateAndFixupUtf8String(newString));
        }
Exemplo n.º 9
0
        internal static Utf8String CreateFromRune(Rune value)
        {
            // Can skip zero-init since we're going to populate the entire buffer.

            Utf8String newString = FastAllocateSkipZeroInit(value.Utf8SequenceLength);

            if (value.IsAscii)
            {
                // Fast path: If an ASCII value, just allocate the one-byte string and fill in the single byte contents.

                newString.DangerousGetMutableReference() = (byte)value.Value;
                return(newString);
            }
            else
            {
                // Slow path: If not ASCII, allocate a string of the appropriate length and fill in the multi-byte contents.

                int bytesWritten = value.EncodeToUtf8(newString.DangerousGetMutableSpan());
                Debug.Assert(newString.Length == bytesWritten);
                return(newString);
            }
        }