Esempio n. 1
0
        /*
         * STATIC FACTORIES
         */

        /// <summary>
        /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
        /// </summary>
        /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param>
        /// <param name="value">
        /// When this method returns, contains a <see cref="Utf8String"/> with the same contents as <paramref name="buffer"/>
        /// if <paramref name="buffer"/> consists of well-formed UTF-8 data. Otherwise, <see langword="null"/>.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if <paramref name="buffer"/> contains well-formed UTF-8 data and <paramref name="value"/>
        /// contains the <see cref="Utf8String"/> encapsulating a copy of that data. Otherwise, <see langword="false"/>.
        /// </returns>
        /// <remarks>
        /// This method is a non-throwing equivalent of the constructor <see cref="Utf8String(ReadOnlySpan{byte})"/>.
        /// </remarks>
        public static bool TryCreateFrom(ReadOnlySpan <byte> buffer, [NotNullWhen(true)] out Utf8String?value)
        {
            if (buffer.IsEmpty)
            {
                value = Empty; // it's valid to create a Utf8String instance from an empty buffer; we'll return the Empty singleton
                return(true);
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(buffer.Length);

#if SYSTEM_PRIVATE_CORELIB
            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length);
#else
            buffer.CopyTo(newString.DangerousGetMutableSpan());
#endif

            // Now perform validation.
            // Reminder: Perform validation over the copy, not over the source.

            if (Utf8Utility.IsWellFormedUtf8(newString.AsBytes()))
            {
                value = newString;
                return(true);
            }
            else
            {
                value = default;
                return(false);
            }
        }
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);

            // Now perform validation.
            // Reminder: Perform validation over the copy, not over the source.

            if (!Utf8Utility.IsWellFormedUtf8(newString.AsBytes()))
            {
                throw new ArgumentException(
                          message: SR.Utf8String_InputContainedMalformedUtf8,
                          paramName: nameof(value));
            }

            return(newString);
        }
Esempio n. 3
0
        private Utf8String InternalSubstringWithoutCorrectnessChecks(int startIndex, int length)
        {
            Debug.Assert(startIndex >= 0, "StartIndex cannot be negative.");
            Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator).");
            Debug.Assert(length >= 0, "Length cannot be negative.");
            Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string.");

            // In debug mode, perform the checks anyway. It's ok if we read just past the end of the
            // Utf8String instance, since we'll just be reading the null terminator (which is safe).

            Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)), "Somebody is trying to split this Utf8String improperly.");
            Debug.Assert(!Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length)), "Somebody is trying to split this Utf8String improperly.");

            if (length == 0)
            {
                return(Empty);
            }
            else if (length == this.Length)
            {
                return(this);
            }
            else
            {
                Utf8String newString = FastAllocateSkipZeroInit(length);
#if SYSTEM_PRIVATE_CORELIB
                Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length);
#else
                this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan());
#endif
                return(newString);
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Substrings this <see cref="Utf8String"/> without bounds checking.
        /// </summary>
        private Utf8String InternalSubstring(int startIndex, int length)
        {
            Debug.Assert(startIndex >= 0, "StartIndex cannot be negative.");
            Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator).");
            Debug.Assert(length >= 0, "Length cannot be negative.");
            Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string.");

            Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions.");

            // Since Utf8String instances must contain well-formed UTF-8 data, we cannot allow a substring such that
            // either boundary of the new substring splits a multi-byte UTF-8 subsequence. Fortunately this is a very
            // easy check: since we assume the original buffer consisted entirely of well-formed UTF-8 data, all we
            // need to do is check that neither the substring we're about to create nor the substring that would
            // follow immediately thereafter begins with a UTF-8 continuation byte. Should this occur, it means that
            // the UTF-8 lead byte is in a prior substring, which would indicate a multi-byte sequence has been split.
            // It's ok for us to dereference the element immediately after the end of the Utf8String instance since
            // we know it's a null terminator.

            if (Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex)) ||
                Utf8Utility.IsUtf8ContinuationByte(DangerousGetMutableReference(startIndex + length)))
            {
                ThrowImproperStringSplit();
            }

            Utf8String newString = FastAllocateSkipZeroInit(length);

#if SYSTEM_PRIVATE_CORELIB
            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length);
#else
            this.GetSpan().Slice(startIndex, length).CopyTo(newString.DangerousGetMutableSpan());
#endif

            return(newString);
        }
Esempio n. 5
0
        internal static Utf8String CreateFromRune(Rune value)
        {
            Utf8String newString    = FastAllocate(value.Utf8SequenceLength);
            int        bytesWritten = value.EncodeToUtf8(new Span <byte>(ref newString.DangerousGetMutableReference(), newString.Length));

            Debug.Assert(bytesWritten == value.Utf8SequenceLength);

            return(newString);
        }
Esempio n. 6
0
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
            return(Utf8Utility.ValidateAndFixupUtf8String(newString));
        }
Esempio n. 7
0
        /*
         * HELPER METHODS
         */

        /// <summary>
        /// Creates a <see cref="Utf8String"/> instance from existing data, bypassing validation.
        /// Also allows the caller to set flags dictating various attributes of the data.
        /// </summary>
        internal static Utf8String DangerousCreateWithoutValidation(ReadOnlySpan <byte> utf8Data, bool assumeWellFormed = false, bool assumeAscii = false)
        {
            if (utf8Data.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(utf8Data.Length);

            utf8Data.CopyTo(new Span <byte>(ref newString.DangerousGetMutableReference(), newString.Length));
            return(newString);
        }
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
            return(Utf8Utility.ValidateAndFixupUtf8String(newString) !); // TODO-NULLABLE: https://github.com/dotnet/roslyn/issues/26761
        }
Esempio n. 9
0
        private Utf8String Ctor(ReadOnlySpan <byte> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            Utf8String newString = FastAllocate(value.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
            return(Utf8Utility.ValidateAndFixupUtf8String(newString) !); // TODO-NULLABLE: Remove ! when nullable attributes are respected
        }
Esempio n. 10
0
        private Utf8String Ctor(ReadOnlySpan <char> value)
        {
            if (value.IsEmpty)
            {
                return(Empty);
            }

            // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.

            Utf8String newString = FastAllocate(Encoding.UTF8.GetByteCount(value));

            Encoding.UTF8.GetBytes(value, new Span <byte>(ref newString.DangerousGetMutableReference(), newString.Length));
            return(newString);
        }
Esempio n. 11
0
        /// <summary>
        /// Substrings this <see cref="Utf8String"/> without bounds checking.
        /// </summary>
        private Utf8String InternalSubstring(int startIndex, int length)
        {
            Debug.Assert(startIndex >= 0, "StartIndex cannot be negative.");
            Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator).");
            Debug.Assert(length >= 0, "Length cannot be negative.");
            Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string.");

            Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions.");

            Utf8String newString = FastAllocate(length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length);
            return(newString);
        }
Esempio n. 12
0
        /// <summary>
        /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
        /// </summary>
        /// <param name="buffer">The existing data from which to create the new <see cref="Utf8String"/>.</param>
        /// <remarks>
        /// If <paramref name="buffer"/> contains any ill-formed UTF-8 subsequences, those subsequences will
        /// be replaced with <see cref="Rune.ReplacementChar"/> in the returned <see cref="Utf8String"/> instance.
        /// This may result in the returned <see cref="Utf8String"/> having different contents (and thus a different
        /// total byte length) than the source parameter <paramref name="buffer"/>.
        /// </remarks>
        public static Utf8String CreateFromRelaxed(ReadOnlySpan <byte> buffer)
        {
            if (buffer.IsEmpty)
            {
                return(Empty);
            }

            // Create and populate the Utf8String instance.

            Utf8String newString = FastAllocateSkipZeroInit(buffer.Length);

            Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(buffer), (uint)buffer.Length);

            // Now perform validation & fixup.

            return(Utf8Utility.ValidateAndFixupUtf8String(newString));
        }
Esempio n. 13
0
        internal static Utf8String CreateFromRune(Rune value)
        {
            // Can skip zero-init since we're going to populate the entire buffer.

            Utf8String newString = FastAllocateSkipZeroInit(value.Utf8SequenceLength);

            if (value.IsAscii)
            {
                // Fast path: If an ASCII value, just allocate the one-byte string and fill in the single byte contents.

                newString.DangerousGetMutableReference() = (byte)value.Value;
                return(newString);
            }
            else
            {
                // Slow path: If not ASCII, allocate a string of the appropriate length and fill in the multi-byte contents.

                int bytesWritten = value.EncodeToUtf8(newString.DangerousGetMutableSpan());
                Debug.Assert(newString.Length == bytesWritten);
                return(newString);
            }
        }