예제 #1
0
        // Unescapes entire string and checks if it has unicode chars
        // Also checks for sequences that are 3986 Unreserved characters as these should be un-escaped
        private static bool CheckForUnicodeOrEscapedUnreserved(string data)
        {
            for (int i = 0; i < data.Length; i++)
            {
                char c = data[i];
                if (c == '%')
                {
                    if ((uint)(i + 2) < (uint)data.Length)
                    {
                        char value = UriHelper.DecodeHexChars(data[i + 1], data[i + 2]);

                        if (value >= UriHelper.UnreservedTable.Length || UriHelper.UnreservedTable[value])
                        {
                            return(true);
                        }

                        i += 2;
                    }
                }
                else if (c > 0x7F)
                {
                    return(true);
                }
            }
            return(false);
        }
예제 #2
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            ValueStringBuilder dest = size <= 256
                ? new ValueStringBuilder(stackalloc char[256])
                : new ValueStringBuilder(size);

            Span <byte> maxUtf8EncodedSpan = stackalloc byte[4];

            for (int i = start; i < end; ++i)
            {
                char ch = pInput[i];
                if (ch == '%')
                {
                    if (end - i > 2)
                    {
                        ch = UriHelper.DecodeHexChars(pInput[i + 1], pInput[i + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[i++]);
                            dest.Append(pInput[i++]);
                            dest.Append(pInput[i]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            i += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode
                            int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence(
                                pInput + i,
                                end - i,
                                ref dest,
                                component == UriComponents.Query,
                                iriParsing: true);

                            Debug.Assert(charactersRead > 0);
                            i += charactersRead - 1; // -1 as i will be incremented in the loop
                        }
                    }
                    else
                    {
                        dest.Append(pInput[i]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    bool isInIriUnicodeRange;
                    bool surrogatePair = false;

                    char ch2 = '\0';

                    if ((char.IsHighSurrogate(ch)) && (i + 1 < end))
                    {
                        ch2 = pInput[i + 1];
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
                    }
                    else
                    {
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
                    }

                    if (isInIriUnicodeRange)
                    {
                        dest.Append(ch);
                        if (surrogatePair)
                        {
                            dest.Append(ch2);
                        }
                    }
                    else
                    {
                        Rune rune;
                        if (surrogatePair)
                        {
                            rune = new Rune(ch, ch2);
                        }
                        else if (!Rune.TryCreate(ch, out rune))
                        {
                            rune = Rune.ReplacementChar;
                        }

                        int         bytesWritten = rune.EncodeToUtf8(maxUtf8EncodedSpan);
                        Span <byte> encodedBytes = maxUtf8EncodedSpan.Slice(0, bytesWritten);

                        foreach (byte b in encodedBytes)
                        {
                            UriHelper.EscapeAsciiChar(b, ref dest);
                        }
                    }

                    if (surrogatePair)
                    {
                        i++;
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[i]);
                }
            }

            return(dest.ToString());
        }
예제 #3
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            ValueStringBuilder dest = new ValueStringBuilder(size);

            byte[]? bytes = null;

            int  next = start;
            char ch;

            Span <byte> maxUtf8EncodedSpan = stackalloc byte[4];

            for (; next < end; ++next)
            {
                if ((ch = pInput[next]) == '%')
                {
                    if (next + 2 < end)
                    {
                        ch = UriHelper.DecodeHexChars(pInput[next + 1], pInput[next + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            next += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode

                            // check if safe to unescape according to Iri rules

                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");

                            int startSeq  = next;
                            int byteCount = 1;
                            // lazy initialization of max size, will reuse the array for next sequences
                            if (bytes is null)
                            {
                                bytes = new byte[end - next];
                            }

                            bytes[0] = (byte)ch;
                            next    += 3;
                            while (next < end)
                            {
                                // Check on exit criterion
                                if ((ch = pInput[next]) != '%' || next + 2 >= end)
                                {
                                    break;
                                }

                                // already made sure we have 3 characters in str
                                ch = UriHelper.DecodeHexChars(pInput[next + 1], pInput[next + 2]);

                                //invalid hex sequence ?
                                if (ch == Uri.c_DummyChar)
                                {
                                    break;
                                }
                                // character is not part of a UTF-8 sequence ?
                                else if (ch < '\x80')
                                {
                                    break;
                                }
                                else
                                {
                                    //a UTF-8 sequence
                                    bytes[byteCount++] = (byte)ch;
                                    next += 3;
                                }

                                Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            }
                            next--; // for loop will increment


                            // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences.
                            Encoding noFallbackCharUTF8 = Encoding.GetEncoding(
                                Encoding.UTF8.CodePage,
                                new EncoderReplacementFallback(""),
                                new DecoderReplacementFallback(""));

                            char[] unescapedChars = new char[bytes.Length];
                            int    charCount      = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);


                            if (charCount != 0)
                            {
                                // If invalid sequences were present in the original escaped string, we need to
                                // copy the escaped versions of those sequences.
                                // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC
                                // rules.
                                UriHelper.MatchUTF8Sequence(ref dest, unescapedChars, charCount, bytes,
                                                            byteCount, component == UriComponents.Query, true);
                            }
                            else
                            {
                                // copy escaped sequence as is
                                for (int i = startSeq; i <= next; ++i)
                                {
                                    dest.Append(pInput[i]);
                                }
                            }
                        }
                    }
                    else
                    {
                        dest.Append(pInput[next]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    bool isInIriUnicodeRange;
                    bool surrogatePair = false;

                    char ch2 = '\0';

                    if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
                    {
                        ch2 = pInput[next + 1];
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
                    }
                    else
                    {
                        isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
                    }

                    if (isInIriUnicodeRange)
                    {
                        dest.Append(ch);
                        if (surrogatePair)
                        {
                            dest.Append(ch2);
                        }
                    }
                    else
                    {
                        Rune rune;
                        if (surrogatePair)
                        {
                            rune = new Rune(ch, ch2);
                        }
                        else if (!Rune.TryCreate(ch, out rune))
                        {
                            rune = Rune.ReplacementChar;
                        }

                        int         bytesWritten = rune.EncodeToUtf8(maxUtf8EncodedSpan);
                        Span <byte> encodedBytes = maxUtf8EncodedSpan.Slice(0, bytesWritten);

                        foreach (byte b in encodedBytes)
                        {
                            UriHelper.EscapeAsciiChar(b, ref dest);
                        }
                    }

                    if (surrogatePair)
                    {
                        next++;
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[next]);
                }
            }

            string result = dest.ToString();

            return(result);
        }
예제 #4
0
        public static unsafe int UnescapePercentEncodedUTF8Sequence(char *input, int length, ref ValueStringBuilder dest, bool isQuery, bool iriParsing)
        {
            // The following assertions rely on the input not mutating mid-operation, as is the case currently since callers are working with strings
            // If we start accepting input such as spans, this method must be audited to ensure no buffer overruns/infinite loops could occur

            // As an optimization, this method should only be called after the first character is known to be a part of a non-ascii UTF8 sequence
            Debug.Assert(length >= 3);
            Debug.Assert(input[0] == '%');
            Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) != Uri.c_DummyChar);
            Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) >= 128);

            uint fourByteBuffer    = 0;
            int  bytesLeftInBuffer = 0;

            int totalCharsConsumed = 0;
            int charsToCopy        = 0;
            int bytesConsumed      = 0;

RefillBuffer:
            int i = totalCharsConsumed + (bytesLeftInBuffer * 3);

ReadByteFromInput:
            if ((uint)(length - i) <= 2 || input[i] != '%')
            {
                goto NoMoreOrInvalidInput;
            }

            uint value = input[i + 1];

            if ((uint)((value - 'A') & ~0x20) <= ('F' - 'A'))
            {
                value = (value | 0x20) - 'a' + 10;
            }
            else if ((value - '8') <= ('9' - '8'))
            {
                value -= '0';
            }
            else
            {
                goto NoMoreOrInvalidInput;  // First character wasn't hex or was <= 7F (Ascii)
            }
            uint second = (uint)input[i + 2] - '0';

            if (second <= 9)
            {
                // second is already [0, 9]
            }
            else if ((uint)((second - ('A' - '0')) & ~0x20) <= ('F' - 'A'))
            {
                second = ((second + '0') | 0x20) - 'a' + 10;
            }
            else
            {
                goto NoMoreOrInvalidInput;  // Second character wasn't Hex
            }
            value = (value << 4) | second;

            Debug.Assert(value >= 128);

            // Rotate the buffer and overwrite the last byte
            if (BitConverter.IsLittleEndian)
            {
                fourByteBuffer = (fourByteBuffer >> 8) | (value << 24);
            }
            else
            {
                fourByteBuffer = (fourByteBuffer << 8) | value;
            }

            if (++bytesLeftInBuffer != 4)
            {
                i += 3;
                goto ReadByteFromInput;
            }

DecodeRune:
            Debug.Assert(totalCharsConsumed % 3 == 0);
            Debug.Assert(bytesLeftInBuffer == 2 || bytesLeftInBuffer == 3 || bytesLeftInBuffer == 4);
            Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00000080 : 0x80000000)) != 0);
            Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00008000 : 0x00800000)) != 0);
            Debug.Assert(bytesLeftInBuffer < 3 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00800000 : 0x00008000)) != 0);
            Debug.Assert(bytesLeftInBuffer < 4 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x80000000 : 0x00000080)) != 0);

            uint temp = fourByteBuffer; // make a copy so that the *copy* (not the original) is marked address-taken

            if (Rune.DecodeFromUtf8(new ReadOnlySpan <byte>(&temp, bytesLeftInBuffer), out Rune rune, out bytesConsumed) == OperationStatus.Done)
            {
                Debug.Assert(bytesConsumed >= 2, $"Rune.DecodeFromUtf8 consumed {bytesConsumed} bytes, likely indicating input was modified concurrently during UnescapePercentEncodedUTF8Sequence's execution");

                if (!iriParsing || IriHelper.CheckIriUnicodeRange((uint)rune.Value, isQuery))
                {
                    if (charsToCopy != 0)
                    {
                        dest.Append(input + totalCharsConsumed - charsToCopy, charsToCopy);
                        charsToCopy = 0;
                    }

                    dest.Append(rune);
                    goto AfterDecodeRune;
                }
            }