Пример #1
0
        //
        // Need to check for invalid utf sequences that may not have given any chars.
        // We got the unescaped chars, we then reencode them and match off the bytes
        // to get the invalid sequence bytes that we just copy off
        //
        internal static unsafe void MatchUTF8Sequence(char *pDest, char[] dest, ref int destOffset, char[] unescapedChars,
                                                      int charCount, byte[] bytes, int byteCount, bool isQuery, bool iriParsing)
        {
            int count = 0;

            fixed(char *unescapedCharsPtr = unescapedChars)
            {
                for (int j = 0; j < charCount; ++j)
                {
                    bool isHighSurr = Char.IsHighSurrogate(unescapedCharsPtr[j]);

                    byte[] encodedBytes       = Encoding.UTF8.GetBytes(unescapedChars, j, isHighSurr ? 2 : 1);
                    int    encodedBytesLength = encodedBytes.Length;

                    // we have to keep unicode chars outside Iri range escaped
                    bool inIriRange = false;
                    if (iriParsing)
                    {
                        if (!isHighSurr)
                        {
                            inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery);
                        }
                        else
                        {
                            bool surrPair = false;
                            inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1],
                                                                        ref surrPair, isQuery);
                        }
                    }

                    while (true)
                    {
                        // Escape any invalid bytes that were before this character
                        while (bytes[count] != encodedBytes[0])
                        {
                            Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                            EscapeAsciiChar((char)bytes[count++], dest, ref destOffset);
                        }

                        // check if all bytes match
                        bool allBytesMatch = true;
                        int  k             = 0;
                        for (; k < encodedBytesLength; ++k)
                        {
                            if (bytes[count + k] != encodedBytes[k])
                            {
                                allBytesMatch = false;
                                break;
                            }
                        }

                        if (allBytesMatch)
                        {
                            count += encodedBytesLength;
                            if (iriParsing)
                            {
                                if (!inIriRange)
                                {
                                    // need to keep chars not allowed as escaped
                                    for (int l = 0; l < encodedBytes.Length; ++l)
                                    {
                                        Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                                        EscapeAsciiChar((char)encodedBytes[l], dest, ref destOffset);
                                    }
                                }
                                else if (!Uri.IsBidiControlCharacter(unescapedCharsPtr[j]))
                                {
                                    //copy chars
                                    Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                                    pDest[destOffset++] = unescapedCharsPtr[j];
                                    if (isHighSurr)
                                    {
                                        Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                                        pDest[destOffset++] = unescapedCharsPtr[j + 1];
                                    }
                                }
                            }
                            else
                            {
                                //copy chars
                                Debug.Assert(dest.Length > destOffset);
                                pDest[destOffset++] = unescapedCharsPtr[j];

                                if (isHighSurr)
                                {
                                    Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                                    pDest[destOffset++] = unescapedCharsPtr[j + 1];
                                }
                            }

                            break; // break out of while (true) since we've matched this char bytes
                        }
                        else
                        {
                            // copy bytes till place where bytes dont match
                            for (int l = 0; l < k; ++l)
                            {
                                Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                                EscapeAsciiChar((char)bytes[count++], dest, ref destOffset);
                            }
                        }
                    }

                    if (isHighSurr)
                    {
                        j++;
                    }
                }
            }

            // Include any trailing invalid sequences
            while (count < byteCount)
            {
                Debug.Assert(dest.Length > destOffset, "Buffer overrun detected");
                EscapeAsciiChar((char)bytes[count++], dest, ref destOffset);
            }
        }
Пример #2
0
        internal static unsafe void UnescapeString(char *pStr, int start, int end, ref ValueStringBuilder dest,
                                                   char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser?syntax, bool isQuery)
        {
            if ((unescapeMode & UnescapeMode.EscapeUnescape) == UnescapeMode.CopyOnly)
            {
                dest.Append(pStr + start, end - start);
                return;
            }

            bool escapeReserved = false;
            bool iriParsing     = Uri.IriParsingStatic(syntax) &&
                                  ((unescapeMode & UnescapeMode.EscapeUnescape) == UnescapeMode.EscapeUnescape);

            for (int next = start; next < end;)
            {
                char ch = (char)0;

                for (; next < end; ++next)
                {
                    if ((ch = pStr[next]) == '%')
                    {
                        if ((unescapeMode & UnescapeMode.Unescape) == 0)
                        {
                            // re-escape, don't check anything else
                            escapeReserved = true;
                        }
                        else if (next + 2 < end)
                        {
                            ch = DecodeHexChars(pStr[next + 1], pStr[next + 2]);
                            // Unescape a good sequence if full unescape is requested
                            if (unescapeMode >= UnescapeMode.UnescapeAll)
                            {
                                if (ch == Uri.c_DummyChar)
                                {
                                    if (unescapeMode >= UnescapeMode.UnescapeAllOrThrow)
                                    {
                                        // Should be a rare case where the app tries to feed an invalid escaped sequence
                                        throw new UriFormatException(SR.net_uri_BadString);
                                    }
                                    continue;
                                }
                            }
                            // re-escape % from an invalid sequence
                            else if (ch == Uri.c_DummyChar)
                            {
                                if ((unescapeMode & UnescapeMode.Escape) != 0)
                                {
                                    escapeReserved = true;
                                }
                                else
                                {
                                    continue;   // we should throw instead but since v1.0 would just print '%'
                                }
                            }
                            // Do not unescape '%' itself unless full unescape is requested
                            else if (ch == '%')
                            {
                                next += 2;
                                continue;
                            }
                            // Do not unescape a reserved char unless full unescape is requested
                            else if (ch == rsvd1 || ch == rsvd2 || ch == rsvd3)
                            {
                                next += 2;
                                continue;
                            }
                            // Do not unescape a dangerous char unless it's V1ToStringFlags mode
                            else if ((unescapeMode & UnescapeMode.V1ToStringFlag) == 0 && IsNotSafeForUnescape(ch))
                            {
                                next += 2;
                                continue;
                            }
                            else if (iriParsing && ((ch <= '\x9F' && IsNotSafeForUnescape(ch)) ||
                                                    (ch > '\x9F' && !IriHelper.CheckIriUnicodeRange(ch, isQuery))))
                            {
                                // check if unenscaping gives a char outside iri range
                                // if it does then keep it escaped
                                next += 2;
                                continue;
                            }
                            // unescape escaped char or escape %
                            break;
                        }
                        else if (unescapeMode >= UnescapeMode.UnescapeAll)
                        {
                            if (unescapeMode >= UnescapeMode.UnescapeAllOrThrow)
                            {
                                // Should be a rare case where the app tries to feed an invalid escaped sequence
                                throw new UriFormatException(SR.net_uri_BadString);
                            }
                            // keep a '%' as part of a bogus sequence
                            continue;
                        }
                        else
                        {
                            escapeReserved = true;
                        }
                        // escape (escapeReserved==true) or otherwise unescape the sequence
                        break;
                    }
                    else if ((unescapeMode & (UnescapeMode.Unescape | UnescapeMode.UnescapeAll))
                             == (UnescapeMode.Unescape | UnescapeMode.UnescapeAll))
                    {
                        continue;
                    }
                    else if ((unescapeMode & UnescapeMode.Escape) != 0)
                    {
                        // Could actually escape some of the characters
                        if (ch == rsvd1 || ch == rsvd2 || ch == rsvd3)
                        {
                            // found an unescaped reserved character -> escape it
                            escapeReserved = true;
                            break;
                        }
                        else if ((unescapeMode & UnescapeMode.V1ToStringFlag) == 0 &&
                                 (ch <= '\x1F' || (ch >= '\x7F' && ch <= '\x9F')))
                        {
                            // found an unescaped reserved character -> escape it
                            escapeReserved = true;
                            break;
                        }
                    }
                }

                //copy off previous characters from input
                while (start < next)
                {
                    dest.Append(pStr[start++]);
                }

                if (next != end)
                {
                    if (escapeReserved)
                    {
                        EscapeAsciiChar((byte)pStr[next], ref dest);
                        escapeReserved = false;
                        next++;
                    }
                    else if (ch <= 127)
                    {
                        dest.Append(ch);
                        next += 3;
                    }
                    else
                    {
                        // Unicode
                        int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence(
                            pStr + next,
                            end - next,
                            ref dest,
                            isQuery,
                            iriParsing);

                        Debug.Assert(charactersRead > 0);
                        next += charactersRead;
                    }

                    start = next;
                }
            }
        }
Пример #3
0
        internal unsafe static char[] UnescapeString(char *pStr, int start, int end, char[] dest, ref int destPosition,
                                                     char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser syntax, bool isQuery)
        {
            byte [] bytes = null;
            byte    escapedReallocations = 0;
            bool    escapeReserved       = false;
            int     next       = start;
            bool    iriParsing = Uri.IriParsingStatic(syntax) &&
                                 ((unescapeMode & UnescapeMode.EscapeUnescape) == UnescapeMode.EscapeUnescape);

            while (true)
            {
                // we may need to re-pin dest[]
                fixed(char *pDest = dest)
                {
                    if ((unescapeMode & UnescapeMode.EscapeUnescape) == UnescapeMode.CopyOnly)
                    {
                        while (start < end)
                        {
                            pDest[destPosition++] = pStr[start++];
                        }
                        return(dest);
                    }

                    while (true)
                    {
                        char ch = (char)0;

                        for (; next < end; ++next)
                        {
                            if ((ch = pStr[next]) == '%')
                            {
                                if ((unescapeMode & UnescapeMode.Unescape) == 0)
                                {
                                    // re-escape, don't check anything else
                                    escapeReserved = true;
                                }
                                else if (next + 2 < end)
                                {
                                    ch = EscapedAscii(pStr[next + 1], pStr[next + 2]);
                                    // Unescape a good sequence if full unescape is requested
                                    if (unescapeMode >= UnescapeMode.UnescapeAll)
                                    {
                                        if (ch == Uri.c_DummyChar)
                                        {
                                            if (unescapeMode >= UnescapeMode.UnescapeAllOrThrow)
                                            {
                                                // Should be a rare case where the app tries to feed an invalid escaped sequence
                                                throw new UriFormatException(SR.GetString(SR.net_uri_BadString));
                                            }
                                            continue;
                                        }
                                    }
                                    // re-escape % from an invalid sequence
                                    else if (ch == Uri.c_DummyChar)
                                    {
                                        if ((unescapeMode & UnescapeMode.Escape) != 0)
                                        {
                                            escapeReserved = true;
                                        }
                                        else
                                        {
                                            continue;   // we should throw instead but since v1.0 woudl just print '%'
                                        }
                                    }
                                    // Do not unescape '%' itself unless full unescape is requested
                                    else if (ch == '%')
                                    {
                                        next += 2;
                                        continue;
                                    }
                                    // Do not unescape a reserved char unless full unescape is requested
                                    else if (ch == rsvd1 || ch == rsvd2 || ch == rsvd3)
                                    {
                                        next += 2;
                                        continue;
                                    }
                                    // Do not unescape a dangerous char unless it's V1ToStringFlags mode
                                    else if ((unescapeMode & UnescapeMode.V1ToStringFlag) == 0 && IsNotSafeForUnescape(ch))
                                    {
                                        next += 2;
                                        continue;
                                    }
                                    else if (iriParsing && ((ch <= '\x9F' && IsNotSafeForUnescape(ch)) ||
                                                            (ch > '\x9F' && !IriHelper.CheckIriUnicodeRange(ch, isQuery))))
                                    {
                                        // check if unenscaping gives a char ouside iri range
                                        // if it does then keep it escaped
                                        next += 2;
                                        continue;
                                    }
                                    // unescape escaped char or escape %
                                    break;
                                }
                                else if (unescapeMode >= UnescapeMode.UnescapeAll)
                                {
                                    if (unescapeMode >= UnescapeMode.UnescapeAllOrThrow)
                                    {
                                        // Should be a rare case where the app tries to feed an invalid escaped sequence
                                        throw new UriFormatException(SR.GetString(SR.net_uri_BadString));
                                    }
                                    // keep a '%' as part of a bogus sequence
                                    continue;
                                }
                                else
                                {
                                    escapeReserved = true;
                                }
                                // escape (escapeReserved==ture) or otheriwse unescape the sequence
                                break;
                            }
                            else if ((unescapeMode & (UnescapeMode.Unescape | UnescapeMode.UnescapeAll))
                                     == (UnescapeMode.Unescape | UnescapeMode.UnescapeAll))
                            {
                                continue;
                            }
                            else if ((unescapeMode & UnescapeMode.Escape) != 0)
                            {
                                // Could actually escape some of the characters
                                if (ch == rsvd1 || ch == rsvd2 || ch == rsvd3)
                                {
                                    // found an unescaped reserved character -> escape it
                                    escapeReserved = true;
                                    break;
                                }
                                else if ((unescapeMode & UnescapeMode.V1ToStringFlag) == 0 &&
                                         (ch <= '\x1F' || (ch >= '\x7F' && ch <= '\x9F')))
                                {
                                    // found an unescaped reserved character -> escape it
                                    escapeReserved = true;
                                    break;
                                }
                            }
                        }

                        //copy off previous characters from input
                        while (start < next)
                        {
                            pDest[destPosition++] = pStr[start++];
                        }

                        if (next != end)
                        {
                            //VsWhidbey#87423
                            if (escapeReserved)
                            {
                                //escape that char
                                // Since this should be _really_ rare case, reallocate with constant size increase of 30 rsvd-type characters.
                                if (escapedReallocations == 0)
                                {
                                    escapedReallocations = 30;
                                    char[] newDest = new char[dest.Length + escapedReallocations * 3];
                                    fixed(char *pNewDest = newDest)
                                    {
                                        for (int i = 0; i < destPosition; ++i)
                                        {
                                            pNewDest[i] = pDest[i];
                                        }
                                    }

                                    dest = newDest;
                                    // re-pin new dest[] array
                                    goto dest_fixed_loop_break;
                                }
                                else
                                {
                                    --escapedReallocations;
                                    EscapeAsciiChar(pStr[next], dest, ref destPosition);
                                    escapeReserved = false;
                                    start          = ++next;
                                    continue;
                                }
                            }

                            // unescaping either one Ascii or possibly multiple Unicode

                            if (ch <= '\x7F')
                            {
                                //ASCII
                                dest[destPosition++] = ch;
                                next += 3;
                                start = next;
                                continue;
                            }

                            // Unicode

                            int byteCount = 1;
                            // lazy initialization of max size, will reuse the array for next sequences
                            if ((object)bytes == null)
                            {
                                bytes = new byte[end - next];
                            }

                            bytes[0] = (byte)ch;
                            next    += 3;
                            while (next < end)
                            {
                                // Check on exit criterion
                                if ((ch = pStr[next]) != '%' || next + 2 >= end)
                                {
                                    break;
                                }

                                // already made sure we have 3 characters in str
                                ch = EscapedAscii(pStr[next + 1], pStr[next + 2]);

                                //invalid hex sequence ?
                                if (ch == Uri.c_DummyChar)
                                {
                                    break;
                                }
                                // character is not part of a UTF-8 sequence ?
                                else if (ch < '\x80')
                                {
                                    break;
                                }
                                else
                                {
                                    //a UTF-8 sequence
                                    bytes[byteCount++] = (byte)ch;
                                    next += 3;
                                }
                            }
                            Encoding noFallbackCharUTF8 = (Encoding)Encoding.UTF8.Clone();
                            noFallbackCharUTF8.EncoderFallback = new EncoderReplacementFallback("");
                            noFallbackCharUTF8.DecoderFallback = new DecoderReplacementFallback("");

                            char[] unescapedChars = new char[bytes.Length];
                            int    charCount      = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);

                            start = next;

                            // match exact bytes
                            // Do not unescape chars not allowed by Iri
                            // need to check for invalid utf sequences that may not have given any chars

                            MatchUTF8Sequence(pDest, dest, ref destPosition, unescapedChars, charCount, bytes,
                                              byteCount, isQuery, iriParsing);
                        }

                        if (next == end)
                        {
                            goto done;
                        }
                    }
                    dest_fixed_loop_break :;
                }
            }

            done :       return(dest);
        }
Пример #4
0
        public static unsafe int UnescapePercentEncodedUTF8Sequence(char *input, int length, ref ValueStringBuilder dest, bool isQuery, bool iriParsing)
        {
            // The following assertions rely on the input not mutating mid-operation, as is the case currently since callers are working with strings
            // If we start accepting input such as spans, this method must be audited to ensure no buffer overruns/infinite loops could occur

            // As an optimization, this method should only be called after the first character is known to be a part of a non-ascii UTF8 sequence
            Debug.Assert(length >= 3);
            Debug.Assert(input[0] == '%');
            Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) != Uri.c_DummyChar);
            Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) >= 128);

            uint fourByteBuffer    = 0;
            int  bytesLeftInBuffer = 0;

            int totalCharsConsumed = 0;
            int charsToCopy        = 0;
            int bytesConsumed      = 0;

RefillBuffer:
            int i = totalCharsConsumed + (bytesLeftInBuffer * 3);

ReadByteFromInput:
            if ((uint)(length - i) <= 2 || input[i] != '%')
            {
                goto NoMoreOrInvalidInput;
            }

            uint value = input[i + 1];

            if ((uint)((value - 'A') & ~0x20) <= ('F' - 'A'))
            {
                value = (value | 0x20) - 'a' + 10;
            }
            else if ((value - '8') <= ('9' - '8'))
            {
                value -= '0';
            }
            else
            {
                goto NoMoreOrInvalidInput;  // First character wasn't hex or was <= 7F (Ascii)
            }
            uint second = (uint)input[i + 2] - '0';

            if (second <= 9)
            {
                // second is already [0, 9]
            }
            else if ((uint)((second - ('A' - '0')) & ~0x20) <= ('F' - 'A'))
            {
                second = ((second + '0') | 0x20) - 'a' + 10;
            }
            else
            {
                goto NoMoreOrInvalidInput;  // Second character wasn't Hex
            }
            value = (value << 4) | second;

            Debug.Assert(value >= 128);

            // Rotate the buffer and overwrite the last byte
            if (BitConverter.IsLittleEndian)
            {
                fourByteBuffer = (fourByteBuffer >> 8) | (value << 24);
            }
            else
            {
                fourByteBuffer = (fourByteBuffer << 8) | value;
            }

            if (++bytesLeftInBuffer != 4)
            {
                i += 3;
                goto ReadByteFromInput;
            }

DecodeRune:
            Debug.Assert(totalCharsConsumed % 3 == 0);
            Debug.Assert(bytesLeftInBuffer == 2 || bytesLeftInBuffer == 3 || bytesLeftInBuffer == 4);
            Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00000080 : 0x80000000)) != 0);
            Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00008000 : 0x00800000)) != 0);
            Debug.Assert(bytesLeftInBuffer < 3 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00800000 : 0x00008000)) != 0);
            Debug.Assert(bytesLeftInBuffer < 4 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x80000000 : 0x00000080)) != 0);

            uint temp = fourByteBuffer; // make a copy so that the *copy* (not the original) is marked address-taken

            if (Rune.DecodeFromUtf8(new ReadOnlySpan <byte>(&temp, bytesLeftInBuffer), out Rune rune, out bytesConsumed) == OperationStatus.Done)
            {
                Debug.Assert(bytesConsumed >= 2, $"Rune.DecodeFromUtf8 consumed {bytesConsumed} bytes, likely indicating input was modified concurrently during UnescapePercentEncodedUTF8Sequence's execution");

                if (!iriParsing || IriHelper.CheckIriUnicodeRange((uint)rune.Value, isQuery))
                {
                    if (charsToCopy != 0)
                    {
                        dest.Append(input + totalCharsConsumed - charsToCopy, charsToCopy);
                        charsToCopy = 0;
                    }

                    dest.Append(rune);
                    goto AfterDecodeRune;
                }
            }