Пример #1
0
        /// <summary>
        /// Try to match a URL in a link or reference, return number of chars matched.
        /// This may optionally be contained in &lt;..&gt;; otherwise
        /// whitespace and unbalanced right parentheses aren't allowed.
        /// Newlines aren't ever allowed.
        /// </summary>
        public static int scan_link_url(string s, int pos, int sourceLength)
        {
            /*!re2c
             * [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
             * [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
             * .? { return 0; }
             */

            if (pos + 1 >= sourceLength)
            {
                return(0);
            }

            var i           = pos;
            var c           = s[i];
            var nextEscaped = false;
            var lastPos     = sourceLength - 1;

            // move past any whitespaces
            ScannerCharacterMatcher.MatchWhitespaces(s, ref c, ref i, lastPos);

            if (c == '<')
            {
                if (i == lastPos)
                {
                    return(0);
                }
                c = s[++i];
                while (i <= lastPos)
                {
                    if (c == '\n' || c == ' ')
                    {
                        return(0);
                    }
                    if (c == '<' && !nextEscaped)
                    {
                        return(0);
                    }
                    if (c == '>' && !nextEscaped)
                    {
                        return(i - pos + 1);
                    }
                    if (i == lastPos)
                    {
                        return(0);
                    }
                    nextEscaped = !nextEscaped && c == '\\';
                    c           = s[++i];
                }
                return(0);
            }

            bool openParens = false;

            while (i <= lastPos)
            {
                if (c == '(' && !nextEscaped)
                {
                    if (openParens)
                    {
                        return(0);
                    }
                    openParens = true;
                }
                if (c == ')' && !nextEscaped)
                {
                    if (!openParens)
                    {
                        return(i - pos);
                    }
                    openParens = false;
                }
                if (c <= 0x20)
                {
                    return(openParens ? 0 : i - pos);
                }

                if (i == lastPos)
                {
                    return(openParens ? 0 : i - pos + 1);
                }

                nextEscaped = !nextEscaped && c == '\\';
                c           = s[++i];
            }

            return(0);
        }
Пример #2
0
        /// <summary>
        /// Try to match email autolink after first &lt;, returning num of chars matched.
        /// </summary>
        public static int scan_autolink_email(string s, int pos, int sourceLength)
        {
            /*!re2c
             * [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
             *  [@]
             *  [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
             *  ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
             *  [>] { return (p - start); }
             * .? { return 0; }
             */

            if (pos + 6 >= sourceLength)
            {
                return(0);
            }

            char c = s[pos];

            if (c == '@')
            {
                return(0);
            }

            int i  = pos;
            int ln = sourceLength - 1;

            while (i <= ln)
            {
                if (c == '@')
                {
                    break;
                }

                if ((c < 'a' || c > 'z') &&
                    (c < 'A' || c > 'Z') &&
                    (c < '0' || c > '9') &&
                    ".!#$%&'*+/=?^_`{|}~-".IndexOf(c) == -1)
                {
                    return(0);
                }
                if (i == ln)
                {
                    return(0);
                }
                c = s[++i];
            }

            // move past '@'
            if (i == ln)
            {
                return(0);
            }
            c = s[++i];
            bool hadDot = false;

            while (true)
            {
                var domainStart = i;
                if (!ScannerCharacterMatcher.MatchAsciiLetterOrDigit(s, ref c, ref i, ln, '-'))
                {
                    return(0);
                }

                if (s[i - 1] == '-' || i - domainStart > 63)
                {
                    return(0);
                }

                if (c == '>')
                {
                    return(hadDot ? i - pos + 1 : 0);
                }

                if (c != '.' || i == ln)
                {
                    return(0);
                }

                hadDot = true;
                c      = s[++i];
            }
        }
Пример #3
0
        private static int _scanHtmlTagOpenTag(string s, int pos, int sourceLength)
        {
            var lastPosition = sourceLength - 1;

            // the minimum length valid tag is "a>"
            if (lastPosition < pos + 1)
            {
                return(0);
            }

            // currentPosition - positioned after the last character matched by that any particular part
            var currentPosition = pos;

            // stores the character at the current position
            char currentChar = s[currentPosition];

            // stores if the previous character was a whitespace
            bool hadWhitespace = false;

            // stores if an attribute name has been parsed
            bool hadAttribute = false;

            // some additional variables used in the process
            char c1;

            // The tag name must start with an ASCII letter
            if (!ScannerCharacterMatcher.MatchAsciiLetter(s, ref currentChar, ref currentPosition, lastPosition))
            {
                return(0);
            }

            // Move past any other characters that make up the tag name
            ScannerCharacterMatcher.MatchHtmlTagNameCharacter(s, ref currentChar, ref currentPosition, lastPosition);

            // loop while the end of string is reached or the tag is closed
            while (currentPosition <= lastPosition)
            {
                // Move past any whitespaces
                hadWhitespace = ScannerCharacterMatcher.MatchWhitespaces(s, ref currentChar, ref currentPosition, lastPosition);

                // check if the end of the tag has been reached
                if (currentChar == '>')
                {
                    return(currentPosition - pos + 1);
                }

                if (currentChar == '/')
                {
                    if (currentPosition == lastPosition)
                    {
                        return(0);
                    }
                    currentChar = s[++currentPosition];
                    return((currentChar == '>') ? currentPosition - pos + 1 : 0);
                }

                // check if arrived at the attribute value
                if (currentChar == '=')
                {
                    if (!hadAttribute || currentPosition == lastPosition)
                    {
                        return(0);
                    }

                    // move past the '=' symbol and any whitespaces
                    currentChar = s[++currentPosition];
                    ScannerCharacterMatcher.MatchWhitespaces(s, ref currentChar, ref currentPosition, lastPosition);

                    if (currentChar == '\'' || currentChar == '\"')
                    {
                        c1 = currentChar;

                        currentChar = s[++currentPosition];
                        ScannerCharacterMatcher.MatchAnythingExcept(s, ref currentChar, ref currentPosition, lastPosition, c1);

                        if (currentChar != c1 || currentPosition == lastPosition)
                        {
                            return(0);
                        }

                        currentChar = s[++currentPosition];
                    }
                    else
                    {
                        // an unquoted value must have at least one character
                        if (!ScannerCharacterMatcher.MatchAnythingExceptWhitespaces(s, ref currentChar, ref currentPosition, lastPosition, '\"', '\'', '=', '<', '>', '`'))
                        {
                            return(0);
                        }
                    }

                    hadAttribute = false;
                    continue;
                }

                // the attribute must be preceded by a whitespace
                if (!hadWhitespace)
                {
                    return(0);
                }

                // if the end has not been found then there is just one possible alternative - an attribute
                // validate that the attribute name starts with a correct character
                if (!ScannerCharacterMatcher.MatchAsciiLetter(s, ref currentChar, ref currentPosition, lastPosition, '_', ':'))
                {
                    return(0);
                }

                // match any remaining characters in the attribute name
                ScannerCharacterMatcher.MatchAsciiLetterOrDigit(s, ref currentChar, ref currentPosition, lastPosition, '_', ':', '.', '-');

                hadAttribute = true;
            }

            return(0);
        }