Esempio n. 1
0
 // Where stringToEscape is intended to be URI data, but not an entire URI.
 // This method will escape any character that is not an unreserved character, including percent signs.
 public static string EscapeDataString(string stringToEscape) =>
 UriHelper.EscapeString(stringToEscape, checkExistingEscaped: false, UriHelper.UnreservedTable);
Esempio n. 2
0
        //
        // Resolves into either baseUri or relativeUri according to conditions OR if not possible it uses newUriString
        // to  return combined URI strings from both Uris
        // otherwise if e != null on output the operation has failed
        //
        internal static Uri?ResolveHelper(Uri baseUri, Uri?relativeUri, ref string?newUriString, ref bool userEscaped)
        {
            Debug.Assert(!baseUri.IsNotAbsoluteUri && !baseUri.UserDrivenParsing, "Uri::ResolveHelper()|baseUri is not Absolute or is controlled by User Parser.");

            string relativeStr;

            if ((object?)relativeUri != null)
            {
                if (relativeUri.IsAbsoluteUri)
                {
                    return(relativeUri);
                }

                relativeStr = relativeUri.OriginalString;
                userEscaped = relativeUri.UserEscaped;
            }
            else
            {
                relativeStr = string.Empty;
            }

            // Here we can assert that passed "relativeUri" is indeed a relative one

            if (relativeStr.Length > 0 && (UriHelper.IsLWS(relativeStr[0]) || UriHelper.IsLWS(relativeStr[relativeStr.Length - 1])))
            {
                relativeStr = relativeStr.Trim(UriHelper.s_WSchars);
            }

            if (relativeStr.Length == 0)
            {
                newUriString = baseUri.GetParts(UriComponents.AbsoluteUri,
                                                baseUri.UserEscaped ? UriFormat.UriEscaped : UriFormat.SafeUnescaped);
                return(null);
            }

            // Check for a simple fragment in relative part
            if (relativeStr[0] == '#' && !baseUri.IsImplicitFile && baseUri.Syntax !.InFact(UriSyntaxFlags.MayHaveFragment))
            {
                newUriString = baseUri.GetParts(UriComponents.AbsoluteUri & ~UriComponents.Fragment,
                                                UriFormat.UriEscaped) + relativeStr;
                return(null);
            }

            // Check for a simple query in relative part
            if (relativeStr[0] == '?' && !baseUri.IsImplicitFile && baseUri.Syntax !.InFact(UriSyntaxFlags.MayHaveQuery))
            {
                newUriString = baseUri.GetParts(UriComponents.AbsoluteUri & ~UriComponents.Query & ~UriComponents.Fragment,
                                                UriFormat.UriEscaped) + relativeStr;
                return(null);
            }

            // Check on the DOS path in the relative Uri (a special case)
            if (relativeStr.Length >= 3 &&
                (relativeStr[1] == ':' || relativeStr[1] == '|') &&
                UriHelper.IsAsciiLetter(relativeStr[0]) &&
                (relativeStr[2] == '\\' || relativeStr[2] == '/'))
            {
                if (baseUri.IsImplicitFile)
                {
                    // It could have file:/// prepended to the result but we want to keep it as *Implicit* File Uri
                    newUriString = relativeStr;
                    return(null);
                }
                else if (baseUri.Syntax !.InFact(UriSyntaxFlags.AllowDOSPath))
                {
                    // The scheme is not changed just the path gets replaced
                    string prefix;
                    if (baseUri.InFact(Flags.AuthorityFound))
                    {
                        prefix = baseUri.Syntax.InFact(UriSyntaxFlags.PathIsRooted) ? ":///" : "://";
                    }
                    else
                    {
                        prefix = baseUri.Syntax.InFact(UriSyntaxFlags.PathIsRooted) ? ":/" : ":";
                    }

                    newUriString = baseUri.Scheme + prefix + relativeStr;
                    return(null);
                }
                // If we are here then input like "http://host/path/" + "C:\x" will produce the result  http://host/path/c:/x
            }

            GetCombinedString(baseUri, relativeStr, userEscaped, ref newUriString);

            if (ReferenceEquals(newUriString, baseUri._string))
            {
                return(baseUri);
            }

            return(null);
        }
Esempio n. 3
0
        //
        // Need to check for invalid utf sequences that may not have given any chars.
        // We got the unescaped chars, we then re-encode them and match off the bytes
        // to get the invalid sequence bytes that we just copy off
        //
        internal static unsafe void MatchUTF8Sequence(char *pDest, char[] dest, ref int destOffset, Span <char> unescapedChars,
                                                      int charCount, byte[] bytes, int byteCount, bool isQuery, bool iriParsing)
        {
            Span <byte> maxUtf8EncodedSpan = stackalloc byte[4];

            int count = 0;

            fixed(char *unescapedCharsPtr = unescapedChars)
            {
                for (int j = 0; j < charCount; ++j)
                {
                    bool        isHighSurr   = char.IsHighSurrogate(unescapedCharsPtr[j]);
                    Span <byte> encodedBytes = maxUtf8EncodedSpan;
                    int         bytesWritten = Encoding.UTF8.GetBytes(unescapedChars.Slice(j, isHighSurr ? 2 : 1), encodedBytes);
                    encodedBytes = encodedBytes.Slice(0, bytesWritten);

                    // we have to keep unicode chars outside Iri range escaped
                    bool inIriRange = false;
                    if (iriParsing)
                    {
                        if (!isHighSurr)
                        {
                            inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery);
                        }
                        else
                        {
                            bool surrPair = false;
                            inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1],
                                                                        ref surrPair, isQuery);
                        }
                    }

                    while (true)
                    {
                        // Escape any invalid bytes that were before this character
                        while (bytes[count] != encodedBytes[0])
                        {
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            EscapeAsciiChar((char)bytes[count++], dest, ref destOffset);
                        }

                        // check if all bytes match
                        bool allBytesMatch = true;
                        int  k             = 0;
                        for (; k < encodedBytes.Length; ++k)
                        {
                            if (bytes[count + k] != encodedBytes[k])
                            {
                                allBytesMatch = false;
                                break;
                            }
                        }

                        if (allBytesMatch)
                        {
                            count += encodedBytes.Length;
                            if (iriParsing)
                            {
                                if (!inIriRange)
                                {
                                    // need to keep chars not allowed as escaped
                                    for (int l = 0; l < encodedBytes.Length; ++l)
                                    {
                                        Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                        EscapeAsciiChar((char)encodedBytes[l], dest, ref destOffset);
                                    }
                                }
                                else if (!UriHelper.IsBidiControlCharacter(unescapedCharsPtr[j]) || !UriParser.DontKeepUnicodeBidiFormattingCharacters)
                                {
                                    //copy chars
                                    Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                    pDest[destOffset++] = unescapedCharsPtr[j];
                                    if (isHighSurr)
                                    {
                                        Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                        pDest[destOffset++] = unescapedCharsPtr[j + 1];
                                    }
                                }
                            }
                            else
                            {
                                //copy chars
                                Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                pDest[destOffset++] = unescapedCharsPtr[j];

                                if (isHighSurr)
                                {
                                    Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                    pDest[destOffset++] = unescapedCharsPtr[j + 1];
                                }
                            }

                            break; // break out of while (true) since we've matched this char bytes
                        }
                        else
                        {
                            // copy bytes till place where bytes don't match
                            for (int l = 0; l < k; ++l)
                            {
                                Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                EscapeAsciiChar((char)bytes[count++], dest, ref destOffset);
                            }
                        }
                    }

                    if (isHighSurr)
                    {
                        j++;
                    }
                }
            }

            // Include any trailing invalid sequences
            while (count < byteCount)
            {
                Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                EscapeAsciiChar((char)bytes[count++], dest, ref destOffset);
            }
        }
Esempio n. 4
0
        internal unsafe static string UnicodeEquivalent(char *hostname, int start, int end, ref bool allAscii, ref bool atLeastOneValidIdn)
        {
            IdnMapping map = new IdnMapping();

            // hostname already validated
            allAscii           = true;
            atLeastOneValidIdn = false;
            string idn = null;

            if (end <= start)
            {
                return(idn);
            }

            string unescapedHostname = UriHelper.StripBidiControlCharacter(hostname, start, (end - start));

            string unicodeEqvlHost = null;
            int    curPos          = 0;
            int    newPos          = 0;
            int    length          = unescapedHostname.Length;
            bool   asciiLabel      = true;
            bool   foundAce        = false;
            bool   checkedAce      = false;
            bool   foundDot        = false;


            // We run a loop where for every label
            // a) if label is ascii and no ace then we lowercase it
            // b) if label is ascii and ace and not valid idn then just lowercase it
            // c) if label is ascii and ace and is valid idn then get its unicode eqvl
            // d) if label is unicode then clean it by running it through idnmapping
            do
            {
                asciiLabel = true;
                foundAce   = false;
                checkedAce = false;
                foundDot   = false;

                //find the dot or hit the end
                newPos = curPos;
                while (newPos < length)
                {
                    char c = unescapedHostname[newPos];
                    if (!checkedAce)
                    {
                        checkedAce = true;
                        if ((newPos + 3 < length) && (c == 'x') && IsIdnAce(unescapedHostname, newPos))
                        {
                            foundAce = true;
                        }
                    }
                    if (asciiLabel && (c > '\x7F'))
                    {
                        asciiLabel = false;
                        allAscii   = false;
                    }
                    if ((c == '.') || (c == '\u3002') ||    //IDEOGRAPHIC FULL STOP
                        (c == '\uFF0E') ||                  //FULLWIDTH FULL STOP
                        (c == '\uFF61'))                    //HALFWIDTH IDEOGRAPHIC FULL STOP
                    {
                        foundDot = true;
                        break;
                    }
                    ++newPos;
                }

                if (!asciiLabel)
                {
                    string asciiForm = unescapedHostname.Substring(curPos, newPos - curPos);
                    try
                    {
                        asciiForm = map.GetAscii(asciiForm);
                    }
                    catch (ArgumentException)
                    {
                        throw new UriFormatException(SR.net_uri_BadUnicodeHostForIdn);
                    }

                    unicodeEqvlHost += map.GetUnicode(asciiForm);
                    if (foundDot)
                    {
                        unicodeEqvlHost += ".";
                    }
                }
                else
                {
                    bool aceValid = false;
                    if (foundAce)
                    {
                        // check ace validity
                        try
                        {
                            unicodeEqvlHost += map.GetUnicode(unescapedHostname.Substring(curPos, newPos - curPos));
                            if (foundDot)
                            {
                                unicodeEqvlHost += ".";
                            }
                            aceValid           = true;
                            atLeastOneValidIdn = true;
                        }
                        catch (ArgumentException)
                        {
                            // not valid ace so treat it as a normal ascii label
                        }
                    }

                    if (!aceValid)
                    {
                        // for invalid aces we just lowercase the label
                        unicodeEqvlHost += unescapedHostname.Substring(curPos, newPos - curPos).ToLowerInvariant();
                        if (foundDot)
                        {
                            unicodeEqvlHost += ".";
                        }
                    }
                }

                curPos = newPos + (foundDot ? 1 : 0);
            } while (curPos < length);

            return(unicodeEqvlHost);
        }
        public static unsafe int UnescapePercentEncodedUTF8Sequence(char *input, int length, ref ValueStringBuilder dest, bool isQuery, bool iriParsing)
        {
            // The following assertions rely on the input not mutating mid-operation, as is the case currently since callers are working with strings
            // If we start accepting input such as spans, this method must be audited to ensure no buffer overruns/infinite loops could occur

            // As an optimization, this method should only be called after the first character is known to be a part of a non-ascii UTF8 sequence
            Debug.Assert(length >= 3);
            Debug.Assert(input[0] == '%');
            Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) != Uri.c_DummyChar);
            Debug.Assert(UriHelper.DecodeHexChars(input[1], input[2]) >= 128);

            uint fourByteBuffer    = 0;
            int  bytesLeftInBuffer = 0;

            int totalCharsConsumed = 0;
            int charsToCopy        = 0;
            int bytesConsumed      = 0;

RefillBuffer:
            int i = totalCharsConsumed + (bytesLeftInBuffer * 3);

ReadByteFromInput:
            if ((uint)(length - i) <= 2 || input[i] != '%')
            {
                goto NoMoreOrInvalidInput;
            }

            uint value = input[i + 1];

            if ((uint)((value - 'A') & ~0x20) <= ('F' - 'A'))
            {
                value = (value | 0x20) - 'a' + 10;
            }
            else if ((value - '8') <= ('9' - '8'))
            {
                value -= '0';
            }
            else
            {
                goto NoMoreOrInvalidInput;  // First character wasn't hex or was <= 7F (Ascii)
            }
            uint second = (uint)input[i + 2] - '0';

            if (second <= 9)
            {
                // second is already [0, 9]
            }
            else if ((uint)((second - ('A' - '0')) & ~0x20) <= ('F' - 'A'))
            {
                second = ((second + '0') | 0x20) - 'a' + 10;
            }
            else
            {
                goto NoMoreOrInvalidInput;  // Second character wasn't Hex
            }
            value = (value << 4) | second;

            Debug.Assert(value >= 128);

            // Rotate the buffer and overwrite the last byte
            if (BitConverter.IsLittleEndian)
            {
                fourByteBuffer = (fourByteBuffer >> 8) | (value << 24);
            }
            else
            {
                fourByteBuffer = (fourByteBuffer << 8) | value;
            }

            if (++bytesLeftInBuffer != 4)
            {
                i += 3;
                goto ReadByteFromInput;
            }

DecodeRune:
            Debug.Assert(totalCharsConsumed % 3 == 0);
            Debug.Assert(bytesLeftInBuffer == 2 || bytesLeftInBuffer == 3 || bytesLeftInBuffer == 4);
            Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00000080 : 0x80000000)) != 0);
            Debug.Assert((fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00008000 : 0x00800000)) != 0);
            Debug.Assert(bytesLeftInBuffer < 3 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x00800000 : 0x00008000)) != 0);
            Debug.Assert(bytesLeftInBuffer < 4 || (fourByteBuffer & (BitConverter.IsLittleEndian ? 0x80000000 : 0x00000080)) != 0);

            uint temp = fourByteBuffer; // make a copy so that the *copy* (not the original) is marked address-taken

            if (Rune.DecodeFromUtf8(new ReadOnlySpan <byte>(&temp, bytesLeftInBuffer), out Rune rune, out bytesConsumed) == OperationStatus.Done)
            {
                Debug.Assert(bytesConsumed >= 2, $"Rune.DecodeFromUtf8 consumed {bytesConsumed} bytes, likely indicating input was modified concurrently during UnescapePercentEncodedUTF8Sequence's execution");

                if (!iriParsing || IriHelper.CheckIriUnicodeRange((uint)rune.Value, isQuery))
                {
                    if (charsToCopy != 0)
                    {
                        dest.Append(input + totalCharsConsumed - charsToCopy, charsToCopy);
                        charsToCopy = 0;
                    }

                    dest.Append(rune);
                    goto AfterDecodeRune;
                }
            }
Esempio n. 6
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            ValueStringBuilder dest = new ValueStringBuilder(size);

            byte[]? bytes = null;

            int  next = start;
            char ch;

            for (; next < end; ++next)
            {
                if ((ch = pInput[next]) == '%')
                {
                    if (next + 2 < end)
                    {
                        ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            next += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode

                            // check if safe to unescape according to Iri rules

                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");

                            int startSeq  = next;
                            int byteCount = 1;
                            // lazy initialization of max size, will reuse the array for next sequences
                            if ((object?)bytes == null)
                            {
                                bytes = new byte[end - next];
                            }

                            bytes[0] = (byte)ch;
                            next    += 3;
                            while (next < end)
                            {
                                // Check on exit criterion
                                if ((ch = pInput[next]) != '%' || next + 2 >= end)
                                {
                                    break;
                                }

                                // already made sure we have 3 characters in str
                                ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);

                                //invalid hex sequence ?
                                if (ch == Uri.c_DummyChar)
                                {
                                    break;
                                }
                                // character is not part of a UTF-8 sequence ?
                                else if (ch < '\x80')
                                {
                                    break;
                                }
                                else
                                {
                                    //a UTF-8 sequence
                                    bytes[byteCount++] = (byte)ch;
                                    next += 3;
                                }

                                Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            }
                            next--; // for loop will increment


                            // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences.
                            Encoding noFallbackCharUTF8 = Encoding.GetEncoding(
                                Encoding.UTF8.CodePage,
                                new EncoderReplacementFallback(""),
                                new DecoderReplacementFallback(""));

                            char[] unescapedChars = new char[bytes.Length];
                            int    charCount      = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);


                            if (charCount != 0)
                            {
                                // If invalid sequences were present in the original escaped string, we need to
                                // copy the escaped versions of those sequences.
                                // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC
                                // rules.
                                UriHelper.MatchUTF8Sequence(ref dest, unescapedChars, charCount, bytes,
                                                            byteCount, component == UriComponents.Query, true);
                            }
                            else
                            {
                                // copy escaped sequence as is
                                for (int i = startSeq; i <= next; ++i)
                                {
                                    dest.Append(pInput[i]);
                                }
                            }
                        }
                    }
                    else
                    {
                        dest.Append(pInput[next]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    bool escape;
                    bool surrogatePair = false;

                    char ch2 = '\0';

                    if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
                    {
                        ch2    = pInput[next + 1];
                        escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
                    }
                    else
                    {
                        escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
                    }

                    if (escape)
                    {
                        Span <byte> encodedBytes = stackalloc byte[4];

                        Rune rune;
                        if (surrogatePair)
                        {
                            rune = new Rune(ch, ch2);
                        }
                        else if (!Rune.TryCreate(ch, out rune))
                        {
                            rune = Rune.ReplacementChar;
                        }

                        int bytesWritten = rune.EncodeToUtf8(encodedBytes);
                        encodedBytes = encodedBytes.Slice(0, bytesWritten);

                        foreach (byte b in encodedBytes)
                        {
                            UriHelper.EscapeAsciiChar(b, ref dest);
                        }
                    }
                    else
                    {
                        dest.Append(ch);
                        if (surrogatePair)
                        {
                            dest.Append(ch2);
                        }
                    }

                    if (surrogatePair)
                    {
                        next++;
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[next]);
                }
            }

            string result = dest.ToString();

            return(result);
        }
Esempio n. 7
0
        //
        // Check reserved chars according to RFC 3987 in a specific component
        //
        internal static bool CheckIsReserved(char ch, UriComponents component)
        {
            if ((component != UriComponents.Scheme) &&
                (component != UriComponents.UserInfo) &&
                (component != UriComponents.Host) &&
                (component != UriComponents.Port) &&
                (component != UriComponents.Path) &&
                (component != UriComponents.Query) &&
                (component != UriComponents.Fragment)
                )
            {
                return((component == (UriComponents)0) ? UriHelper.IsGenDelim(ch) : false);
            }
            else if (UriParser.DontEnableStrictRFC3986ReservedCharacterSets)
            {
                // Since we aren't enabling strict RFC 3986 reserved sets, we stick with the old behavior
                // (for app-compat) which was a broken mix of RFCs 2396 and 3986.
                switch (component)
                {
                case UriComponents.UserInfo:
                    if (ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@')
                    {
                        return(true);
                    }
                    break;

                case UriComponents.Host:
                    if (ch == ':' || ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']' || ch == '@')
                    {
                        return(true);
                    }
                    break;

                case UriComponents.Path:
                    if (ch == '/' || ch == '?' || ch == '#' || ch == '[' || ch == ']')
                    {
                        return(true);
                    }
                    break;

                case UriComponents.Query:
                    if (ch == '#' || ch == '[' || ch == ']')
                    {
                        return(true);
                    }
                    break;

                case UriComponents.Fragment:
                    if (ch == '#' || ch == '[' || ch == ']')
                    {
                        return(true);
                    }
                    break;

                default:
                    break;
                }
                return(false);
            }
            else
            {
                return(UriHelper.RFC3986ReservedMarks.IndexOf(ch) >= 0);
            }
        }
Esempio n. 8
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            char[] dest  = new char[end - start];
            byte[] bytes = null;

            // Pin the array to do pointer accesses
            GCHandle destHandle = GCHandle.Alloc(dest, GCHandleType.Pinned);
            char *   pDest      = (char *)destHandle.AddrOfPinnedObject();

            const int percentEncodingLen     = 3; // Escaped UTF-8 will take 3 chars: %AB.
            const int bufferCapacityIncrease = 30 * percentEncodingLen;
            int       bufferRemaining        = 0;

            int  next       = start;
            int  destOffset = 0;
            char ch;
            bool escape        = false;
            bool surrogatePair = false;

            for (; next < end; ++next)
            {
                escape        = false;
                surrogatePair = false;

                if ((ch = pInput[next]) == '%')
                {
                    if (next + 2 < end)
                    {
                        ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            pDest[destOffset++] = pInput[next++];
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            pDest[destOffset++] = pInput[next++];
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            pDest[destOffset++] = pInput[next];
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            //ASCII
                            pDest[destOffset++] = ch;
                            next += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode

                            // check if safe to unescape according to Iri rules

                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");

                            int startSeq  = next;
                            int byteCount = 1;
                            // lazy initialization of max size, will reuse the array for next sequences
                            if ((object)bytes == null)
                            {
                                bytes = new byte[end - next];
                            }

                            bytes[0] = (byte)ch;
                            next    += 3;
                            while (next < end)
                            {
                                // Check on exit criterion
                                if ((ch = pInput[next]) != '%' || next + 2 >= end)
                                {
                                    break;
                                }

                                // already made sure we have 3 characters in str
                                ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);

                                //invalid hex sequence ?
                                if (ch == Uri.c_DummyChar)
                                {
                                    break;
                                }
                                // character is not part of a UTF-8 sequence ?
                                else if (ch < '\x80')
                                {
                                    break;
                                }
                                else
                                {
                                    //a UTF-8 sequence
                                    bytes[byteCount++] = (byte)ch;
                                    next += 3;
                                }

                                Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            }
                            next--; // for loop will increment


                            // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences.
                            Encoding noFallbackCharUTF8 = Encoding.GetEncoding(
                                Encoding.UTF8.CodePage,
                                new EncoderReplacementFallback(""),
                                new DecoderReplacementFallback(""));

                            char[] unescapedChars = new char[bytes.Length];
                            int    charCount      = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);


                            if (charCount != 0)
                            {
                                // If invalid sequences were present in the original escaped string, we need to
                                // copy the escaped versions of those sequences.
                                // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC
                                // rules.
                                UriHelper.MatchUTF8Sequence(pDest, dest, ref destOffset, unescapedChars, charCount, bytes,
                                                            byteCount, component == UriComponents.Query, true);
                            }
                            else
                            {
                                // copy escaped sequence as is
                                for (int i = startSeq; i <= next; ++i)
                                {
                                    Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                    pDest[destOffset++] = pInput[i];
                                }
                            }
                        }
                    }
                    else
                    {
                        Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                        pDest[destOffset++] = pInput[next];
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    char ch2;

                    if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
                    {
                        ch2    = pInput[next + 1];
                        escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
                        if (!escape)
                        {
                            // copy the two chars
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            pDest[destOffset++] = pInput[next++];
                            Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                            pDest[destOffset++] = pInput[next];
                        }
                    }
                    else
                    {
                        if (CheckIriUnicodeRange(ch, component == UriComponents.Query))
                        {
                            if (!UriHelper.IsBidiControlCharacter(ch) || !UriParser.DontKeepUnicodeBidiFormattingCharacters)
                            {
                                // copy it
                                Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                                pDest[destOffset++] = pInput[next];
                            }
                        }
                        else
                        {
                            // escape it
                            escape = true;
                        }
                    }
                }
                else
                {
                    // just copy the character
                    Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset.");
                    pDest[destOffset++] = pInput[next];
                }

                if (escape)
                {
                    const int MaxNumberOfBytesEncoded = 4;

                    if (bufferRemaining < MaxNumberOfBytesEncoded * percentEncodingLen)
                    {
                        int newBufferLength = 0;

                        checked
                        {
                            // may need more memory since we didn't anticipate escaping
                            newBufferLength  = dest.Length + bufferCapacityIncrease;
                            bufferRemaining += bufferCapacityIncrease;
                        }

                        char[] newDest = new char[newBufferLength];

                        fixed(char *pNewDest = newDest)
                        {
                            Buffer.MemoryCopy((byte *)pDest, (byte *)pNewDest, newBufferLength * sizeof(char), destOffset * sizeof(char));
                        }

                        if (destHandle.IsAllocated)
                        {
                            destHandle.Free();
                        }

                        dest = newDest;

                        // re-pin new dest[] array
                        destHandle = GCHandle.Alloc(dest, GCHandleType.Pinned);
                        pDest      = (char *)destHandle.AddrOfPinnedObject();
                    }

                    byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded];
                    fixed(byte *pEncodedBytes = &encodedBytes[0])
                    {
                        int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded);

                        Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount");

                        bufferRemaining -= encodedBytesCount * percentEncodingLen;

                        for (int count = 0; count < encodedBytesCount; ++count)
                        {
                            UriHelper.EscapeAsciiChar((char)encodedBytes[count], dest, ref destOffset);
                        }
                    }
                }
            }

            if (destHandle.IsAllocated)
            {
                destHandle.Free();
            }

            Debug.Assert(destOffset <= dest.Length, "Destination length met or exceeded destination offset.");
            return(new string(dest, 0, destOffset));
        }
Esempio n. 9
0
        //
        // IRI normalization for strings containing characters that are not allowed or
        // escaped characters that should be unescaped in the context of the specified Uri component.
        //
        internal static unsafe string EscapeUnescapeIri(char *pInput, int start, int end, UriComponents component)
        {
            int size = end - start;
            ValueStringBuilder dest = new ValueStringBuilder(size);

            byte[]? bytes = null;

            const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB.
            int       bufferRemaining    = 0;

            int  next = start;
            char ch;
            bool escape        = false;
            bool surrogatePair = false;

            for (; next < end; ++next)
            {
                escape        = false;
                surrogatePair = false;

                if ((ch = pInput[next]) == '%')
                {
                    if (next + 2 < end)
                    {
                        ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);

                        // Do not unescape a reserved char
                        if (ch == Uri.c_DummyChar || ch == '%' || CheckIsReserved(ch, component) || UriHelper.IsNotSafeForUnescape(ch))
                        {
                            // keep as is
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next]);
                            continue;
                        }
                        else if (ch <= '\x7F')
                        {
                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            //ASCII
                            dest.Append(ch);
                            next += 2;
                            continue;
                        }
                        else
                        {
                            // possibly utf8 encoded sequence of unicode

                            // check if safe to unescape according to Iri rules

                            Debug.Assert(ch < 0xFF, "Expecting ASCII character.");

                            int startSeq  = next;
                            int byteCount = 1;
                            // lazy initialization of max size, will reuse the array for next sequences
                            if ((object?)bytes == null)
                            {
                                bytes = new byte[end - next];
                            }

                            bytes[0] = (byte)ch;
                            next    += 3;
                            while (next < end)
                            {
                                // Check on exit criterion
                                if ((ch = pInput[next]) != '%' || next + 2 >= end)
                                {
                                    break;
                                }

                                // already made sure we have 3 characters in str
                                ch = UriHelper.EscapedAscii(pInput[next + 1], pInput[next + 2]);

                                //invalid hex sequence ?
                                if (ch == Uri.c_DummyChar)
                                {
                                    break;
                                }
                                // character is not part of a UTF-8 sequence ?
                                else if (ch < '\x80')
                                {
                                    break;
                                }
                                else
                                {
                                    //a UTF-8 sequence
                                    bytes[byteCount++] = (byte)ch;
                                    next += 3;
                                }

                                Debug.Assert(ch < 0xFF, "Expecting ASCII character.");
                            }
                            next--; // for loop will increment


                            // Using encoder with no replacement fall-back will skip all invalid UTF-8 sequences.
                            Encoding noFallbackCharUTF8 = Encoding.GetEncoding(
                                Encoding.UTF8.CodePage,
                                new EncoderReplacementFallback(""),
                                new DecoderReplacementFallback(""));

                            char[] unescapedChars = new char[bytes.Length];
                            int    charCount      = noFallbackCharUTF8.GetChars(bytes, 0, byteCount, unescapedChars, 0);


                            if (charCount != 0)
                            {
                                // If invalid sequences were present in the original escaped string, we need to
                                // copy the escaped versions of those sequences.
                                // Decoded Unicode values will be kept only when they are allowed by the URI/IRI RFC
                                // rules.
                                UriHelper.MatchUTF8Sequence(ref dest, unescapedChars, charCount, bytes,
                                                            byteCount, component == UriComponents.Query, true);
                            }
                            else
                            {
                                // copy escaped sequence as is
                                for (int i = startSeq; i <= next; ++i)
                                {
                                    dest.Append(pInput[i]);
                                }
                            }
                        }
                    }
                    else
                    {
                        dest.Append(pInput[next]);
                    }
                }
                else if (ch > '\x7f')
                {
                    // unicode

                    char ch2;

                    if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
                    {
                        ch2    = pInput[next + 1];
                        escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
                        if (!escape)
                        {
                            // copy the two chars
                            dest.Append(pInput[next++]);
                            dest.Append(pInput[next]);
                        }
                    }
                    else
                    {
                        if (CheckIriUnicodeRange(ch, component == UriComponents.Query))
                        {
                            // copy it
                            dest.Append(pInput[next]);
                        }
                        else
                        {
                            // escape it
                            escape = true;
                        }
                    }
                }
                else
                {
                    // just copy the character
                    dest.Append(pInput[next]);
                }

                if (escape)
                {
                    const int MaxNumberOfBytesEncoded = 4;

                    byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded];
                    fixed(byte *pEncodedBytes = &encodedBytes[0])
                    {
                        int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded);

                        Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount");

                        bufferRemaining -= encodedBytesCount * percentEncodingLen;

                        for (int count = 0; count < encodedBytesCount; ++count)
                        {
                            UriHelper.EscapeAsciiChar((char)encodedBytes[count], ref dest);
                        }
                    }
                }
            }

            string result = dest.ToString();

            return(result);
        }
Esempio n. 10
0
        //
        // Parse
        //
        //  Convert this IPv6 address into a sequence of 8 16-bit numbers
        //
        // Inputs:
        //  <member>    Name
        //      The validated IPv6 address
        //
        // Outputs:
        //  <member>    numbers
        //      Array filled in with the numbers in the IPv6 groups
        //
        //  <member>    PrefixLength
        //      Set to the number after the prefix separator (/) if found
        //
        // Assumes:
        //  <Name> has been validated and contains only hex digits in groups of
        //  16-bit numbers, the characters ':' and '/', and a possible IPv4
        //  address
        //
        // Returns:
        //  true if this is a loopback, false otherwise. There is no failure indication as the sting must be a valid one.
        //
        // Throws:
        //  Nothing
        //

        unsafe internal static bool Parse(string address, ushort *numbers, int start, ref string scopeId)
        {
            int  number          = 0;
            int  index           = 0;
            int  compressorIndex = -1;
            bool numberIsValid   = true;

            //This used to be a class instance member but have not been used so far
            int PrefixLength = 0;

            if (address[start] == '[')
            {
                ++start;
            }

            for (int i = start; i < address.Length && address[i] != ']';)
            {
                switch (address[i])
                {
                case '%':
                    if (numberIsValid)
                    {
                        numbers[index++] = (ushort)number;
                        numberIsValid    = false;
                    }

                    start = i;
                    for (++i; address[i] != ']' && address[i] != '/'; ++i)
                    {
                        ;
                    }
                    scopeId = address.Substring(start, i - start);
                    // ignore prefix if any
                    for (; address[i] != ']'; ++i)
                    {
                        ;
                    }
                    break;

                case ':':
                    numbers[index++] = (ushort)number;
                    number           = 0;
                    ++i;
                    if (address[i] == ':')
                    {
                        compressorIndex = index;
                        ++i;
                    }
                    else if ((compressorIndex < 0) && (index < 6))
                    {
                        //
                        // no point checking for IPv4 address if we don't
                        // have a compressor or we haven't seen 6 16-bit
                        // numbers yet
                        //

                        break;
                    }

                    //
                    // check to see if the upcoming number is really an IPv4
                    // address. If it is, convert it to 2 ushort numbers
                    //

                    for (int j = i; (address[j] != ']') &&
                         (address[j] != ':') &&
                         (address[j] != '%') &&
                         (address[j] != '/') &&
                         (j < i + 4); ++j)
                    {
                        if (address[j] == '.')
                        {
                            //
                            // we have an IPv4 address. Find the end of it:
                            // we know that since we have a valid IPv6
                            // address, the only things that will terminate
                            // the IPv4 address are the prefix delimiter '/'
                            // or the end-of-string (which we conveniently
                            // delimited with ']')
                            //

                            while ((address[j] != ']') && (address[j] != '/') && (address[j] != '%'))
                            {
                                ++j;
                            }
                            number           = IPv4AddressHelper.ParseHostNumber(address, i, j);
                            numbers[index++] = (ushort)(number >> 16);
                            numbers[index++] = (ushort)number;
                            i = j;

                            //
                            // set this to avoid adding another number to
                            // the array if there's a prefix
                            //

                            number        = 0;
                            numberIsValid = false;
                            break;
                        }
                    }
                    break;

                case '/':
                    if (numberIsValid)
                    {
                        numbers[index++] = (ushort)number;
                        numberIsValid    = false;
                    }

                    //
                    // since we have a valid IPv6 address string, the prefix
                    // length is the last token in the string
                    //

                    for (++i; address[i] != ']'; ++i)
                    {
                        PrefixLength = PrefixLength * 10 + (address[i] - '0');
                    }
                    break;

                default:
                    number = number * 16 + UriHelper.FromHex(address[i++]);
                    break;
                }
            }

            //
            // add number to the array if its not the prefix length or part of
            // an IPv4 address that's already been handled
            //

            if (numberIsValid)
            {
                numbers[index++] = (ushort)number;
            }

            //
            // if we had a compressor sequence ("::") then we need to expand the
            // numbers array
            //

            if (compressorIndex > 0)
            {
                int toIndex   = NumberOfLabels - 1;
                int fromIndex = index - 1;

                for (int i = index - compressorIndex; i > 0; --i)
                {
                    numbers[toIndex--]   = numbers[fromIndex];
                    numbers[fromIndex--] = 0;
                }
            }

            //
            // is the address loopback? Loopback is defined as one of:
            //
            //  0:0:0:0:0:0:0:1
            //  0:0:0:0:0:0:127.0.0.1       == 0:0:0:0:0:0:7F00:0001
            //  0:0:0:0:0:FFFF:127.0.0.1    == 0:0:0:0:0:FFFF:7F00:0001
            //

            return(((numbers[0] == 0) &&
                    (numbers[1] == 0) &&
                    (numbers[2] == 0) &&
                    (numbers[3] == 0) &&
                    (numbers[4] == 0)) &&
                   (((numbers[5] == 0) &&
                     (numbers[6] == 0) &&
                     (numbers[7] == 1)) ||
                    (((numbers[6] == 0x7F00) &&
                      (numbers[7] == 0x0001)) &&
                     ((numbers[5] == 0) ||
                      (numbers[5] == 0xFFFF)))));
        }
Esempio n. 11
0
        //
        // InternalIsValid
        //
        //  Determine whether a name is a valid IPv6 address. Rules are:
        //
        //   *  8 groups of 16-bit hex numbers, separated by ':'
        //   *  a *single* run of zeros can be compressed using the symbol '::'
        //   *  an optional string of a ScopeID delimited by '%'
        //   *  an optional (last) 1 or 2 character prefix length field delimited by '/'
        //   *  the last 32 bits in an address can be represented as an IPv4 address
        //
        // Inputs:
        //  <argument>  name
        //      Domain name field of a URI to check for pattern match with
        //      IPv6 address
        //  validateStrictAddress: if set to true, it expects strict ipv6 address. Otherwise it expects
        //      part of the string in ipv6 format.
        //
        // Outputs:
        //  Nothing
        //
        // Assumes:
        //  the correct name is terminated by  ']' character
        //
        // Returns:
        //  true if <name> has IPv6 format/ipv6 address based on validateStrictAddress, else false
        //
        // Throws:
        //  Nothing
        //

        //  Remarks: MUST NOT be used unless all input indexes are verified and trusted.
        //           start must be next to '[' position, or error is reported
        unsafe private static bool InternalIsValid(char *name, int start, ref int end, bool validateStrictAddress)
        {
            int  sequenceCount   = 0;
            int  sequenceLength  = 0;
            bool haveCompressor  = false;
            bool haveIPv4Address = false;
            bool havePrefix      = false;
            bool expectingNumber = true;
            int  lastSequence    = 1;

            int i;

            for (i = start; i < end; ++i)
            {
                if (havePrefix ? (name[i] >= '0' && name[i] <= '9') : UriHelper.IsHexDigit(name[i]))
                {
                    ++sequenceLength;
                    expectingNumber = false;
                }
                else
                {
                    if (sequenceLength > 4)
                    {
                        return(false);
                    }
                    if (sequenceLength != 0)
                    {
                        ++sequenceCount;
                        lastSequence = i - sequenceLength;
                    }
                    switch (name[i])
                    {
                    case '%':
                        while (true)
                        {
                            //accept anything in scopeID
                            if (++i == end)
                            {
                                // no closing ']', fail
                                return(false);
                            }
                            if (name[i] == ']')
                            {
                                goto case ']';
                            }
                            else if (name[i] == '/')
                            {
                                goto case '/';
                            }
                        }

                    case ']':
                        start = i;
                        i     = end;
                        //this will make i = end+1
                        continue;

                    case ':':
                        if ((i > 0) && (name[i - 1] == ':'))
                        {
                            if (haveCompressor)
                            {
                                //
                                // can only have one per IPv6 address
                                //

                                return(false);
                            }
                            haveCompressor  = true;
                            expectingNumber = false;
                        }
                        else
                        {
                            expectingNumber = true;
                        }
                        break;

                    case '/':
                        if (validateStrictAddress)
                        {
                            return(false);
                        }
                        if ((sequenceCount == 0) || havePrefix)
                        {
                            return(false);
                        }
                        havePrefix      = true;
                        expectingNumber = true;
                        break;

                    case '.':
                        if (haveIPv4Address)
                        {
                            return(false);
                        }

                        i = end;
                        if (!IPv4AddressHelper.IsValid(name, lastSequence, ref i, true, false, false))
                        {
                            return(false);
                        }
                        // ipv4 address takes 2 slots in ipv6 address, one was just counted meeting the '.'
                        ++sequenceCount;
                        haveIPv4Address = true;
                        --i;                // it will be incremented back on the next loop
                        break;

                    default:
                        return(false);
                    }
                    sequenceLength = 0;
                }
            }

            //
            // if the last token was a prefix, check number of digits
            //

            if (havePrefix && ((sequenceLength < 1) || (sequenceLength > 2)))
            {
                return(false);
            }

            //
            // these sequence counts are -1 because it is implied in end-of-sequence
            //

            int expectedSequenceCount = 8 + (havePrefix ? 1 : 0);

            if (!expectingNumber && (sequenceLength <= 4) && (haveCompressor ? (sequenceCount < expectedSequenceCount) : (sequenceCount == expectedSequenceCount)))
            {
                if (i == end + 1)
                {
                    // ']' was found
                    end = start + 1;
                    return(true);
                }
                return(false);
            }
            return(false);
        }