Example #1
0
        public SplitResult Split(char separator, Utf8StringSplitOptions options = Utf8StringSplitOptions.None)
        {
            if (!Rune.TryCreate(separator, out Rune rune))
            {
                throw new ArgumentOutOfRangeException(
                          paramName: nameof(separator),
                          message: SR.ArgumentOutOfRange_Utf16SurrogatesDisallowed);
            }

            CheckSplitOptions(options);

            return(new SplitResult(this, rune, options));
        }
Example #2
0
        //
        // Check if highSurr and lowSurr are a surrogate pair then
        // it checks if the combined char is in the range
        // Takes in isQuery because iri restrictions for query are different
        // This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
        //
        internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery)
        {
            Debug.Assert(char.IsHighSurrogate(highSurr));

            if (Rune.TryCreate(highSurr, lowSurr, out Rune rune))
            {
                isSurrogatePair = true;

                // U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it.
                // U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF.
                // U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true).

                return(((rune.Value & 0xFFFF) < 0xFFFE) &&
                       ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000)) &&
                       (isQuery || rune.Value < 0xF0000));
            }

            isSurrogatePair = false;
            return(false);
        }
Example #3
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            ValueStringBuilder dest = new ValueStringBuilder(size);

            byte[]? bytes = null;

            int  next = start;
            char ch;

            Span <byte> maxUtf8EncodedSpan = stackalloc byte[4];

            for (; next < end; ++next)
            {
                if ((ch = pInput[next]) == '%')
                {
                    if (next + 2 < end)
                    {
                        ch = UriHelper.DecodeHexChars(pInput[next + 1], pInput[next + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            next += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode

                            // check if safe to unescape according to Iri rules

                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");

                            int startSeq  = next;
                            int byteCount = 1;
                            // lazy initialization of max size, will reuse the array for next sequences
                            if (bytes is null)
                            {
                                bytes = new byte[end - next];
                            }

                            bytes[0] = (byte)ch;
                            next    += 3;
                            while (next < end)
                            {
                                // Check on exit criterion
                                if ((ch = pInput[next]) != '%' || next + 2 >= end)
                                {
                                    break;
                                }

                                // already made sure we have 3 characters in str
                                ch = UriHelper.DecodeHexChars(pInput[next + 1], pInput[next + 2]);

                                //invalid hex sequence ?
                                if (ch == Uri.c_DummyChar)
                                {
                                    break;
                                }
                                // character is not part of a UTF-8 sequence ?
                                else if (ch < '\x80')
                                {
                                    break;
                                }
                                else
                                {
                                    //a UTF-8 sequence
                                    bytes[byteCount++] = (byte)ch;
                                    next += 3;
                                }

                                Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            }
                            next--; // for loop will increment


                            // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences.
                            Encoding noFallbackCharUTF8 = Encoding.GetEncoding(
                                Encoding.UTF8.CodePage,
                                new EncoderReplacementFallback(""),
                                new DecoderReplacementFallback(""));

                            char[] unescapedChars = new char[bytes.Length];
                            int    charCount      = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);


                            if (charCount != 0)
                            {
                                // If invalid sequences were present in the original escaped string, we need to
                                // copy the escaped versions of those sequences.
                                // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC
                                // rules.
                                UriHelper.MatchUTF8Sequence(ref dest, unescapedChars, charCount, bytes,
                                                            byteCount, component == UriComponents.Query, true);
                            }
                            else
                            {
                                // copy escaped sequence as is
                                for (int i = startSeq; i <= next; ++i)
                                {
                                    dest.Append(pInput[i]);
                                }
                            }
                        }
                    }
                    else
                    {
                        dest.Append(pInput[next]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    bool isInIriUnicodeRange;
                    bool surrogatePair = false;

                    char ch2 = '\0';

                    if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
                    {
                        ch2 = pInput[next + 1];
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
                    }
                    else
                    {
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
                    }

                    if (isInIriUnicodeRange)
                    {
                        dest.Append(ch);
                        if (surrogatePair)
                        {
                            dest.Append(ch2);
                        }
                    }
                    else
                    {
                        Rune rune;
                        if (surrogatePair)
                        {
                            rune = new Rune(ch, ch2);
                        }
                        else if (!Rune.TryCreate(ch, out rune))
                        {
                            rune = Rune.ReplacementChar;
                        }

                        int         bytesWritten = rune.EncodeToUtf8(maxUtf8EncodedSpan);
                        Span <byte> encodedBytes = maxUtf8EncodedSpan.Slice(0, bytesWritten);

                        foreach (byte b in encodedBytes)
                        {
                            UriHelper.EscapeAsciiChar(b, ref dest);
                        }
                    }

                    if (surrogatePair)
                    {
                        next++;
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[next]);
                }
            }

            string result = dest.ToString();

            return(result);
        }
 // Ordinal search
 public int IndexOf(char value)
 {
     return(Rune.TryCreate(value, out Rune result) ? IndexOf(result) : -1);
 }
 // Ordinal search
 public bool StartsWith(char value)
 {
     return(Rune.TryCreate(value, out Rune result) && StartsWith(result));
 }
 // Ordinal search
 public bool Contains(char value)
 {
     return(Rune.TryCreate(value, out Rune result) && Contains(result));
 }
Example #7
0
 /// <summary>
 /// Returns a value stating whether the current <see cref="Utf8String"/> instance begins with
 /// <paramref name="value"/>. The specified comparison is used.
 /// </summary>
 public bool StartsWith(char value, StringComparison comparison)
 {
     return(Rune.TryCreate(value, out Rune rune) && StartsWith(rune, comparison));
 }
Example #8
0
 /// <summary>
 /// Returns a value stating whether the current <see cref="Utf8String"/> instance ends with
 /// <paramref name="value"/>. An ordinal comparison is used.
 /// </summary>
 public bool EndsWith(char value)
 {
     return(Rune.TryCreate(value, out Rune rune) && EndsWith(rune));
 }
Example #9
0
 /// <summary>
 /// Returns a value stating whether the current <see cref="Utf8String"/> instance contains
 /// <paramref name="value"/>. The specified comparison is used.
 /// </summary>
 public bool Contains(char value, StringComparison comparison)
 {
     return(Rune.TryCreate(value, out Rune rune) && Contains(rune, comparison));
 }
Example #10
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            ValueStringBuilder dest = size <= 256
                ? new ValueStringBuilder(stackalloc char[256])
                : new ValueStringBuilder(size);

            Span <byte> maxUtf8EncodedSpan = stackalloc byte[4];

            for (int i = start; i < end; ++i)
            {
                char ch = pInput[i];
                if (ch == '%')
                {
                    if (end - i > 2)
                    {
                        ch = UriHelper.DecodeHexChars(pInput[i + 1], pInput[i + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[i++]);
                            dest.Append(pInput[i++]);
                            dest.Append(pInput[i]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            i += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode
                            int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence(
                                pInput + i,
                                end - i,
                                ref dest,
                                component == UriComponents.Query,
                                iriParsing: true);

                            Debug.Assert(charactersRead > 0);
                            i += charactersRead - 1; // -1 as i will be incremented in the loop
                        }
                    }
                    else
                    {
                        dest.Append(pInput[i]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    bool isInIriUnicodeRange;
                    bool surrogatePair = false;

                    char ch2 = '\0';

                    if ((char.IsHighSurrogate(ch)) && (i + 1 < end))
                    {
                        ch2 = pInput[i + 1];
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
                    }
                    else
                    {
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
                    }

                    if (isInIriUnicodeRange)
                    {
                        dest.Append(ch);
                        if (surrogatePair)
                        {
                            dest.Append(ch2);
                        }
                    }
                    else
                    {
                        Rune rune;
                        if (surrogatePair)
                        {
                            rune = new Rune(ch, ch2);
                        }
                        else if (!Rune.TryCreate(ch, out rune))
                        {
                            rune = Rune.ReplacementChar;
                        }

                        int         bytesWritten = rune.EncodeToUtf8(maxUtf8EncodedSpan);
                        Span <byte> encodedBytes = maxUtf8EncodedSpan.Slice(0, bytesWritten);

                        foreach (byte b in encodedBytes)
                        {
                            UriHelper.EscapeAsciiChar(b, ref dest);
                        }
                    }

                    if (surrogatePair)
                    {
                        i++;
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[i]);
                }
            }

            return(dest.ToString());
        }