コード例 #1
0
        private static bool TryParseHtmlTagDeclaration(ref StringSlice text, StringBuilder builder)
        {
            var  c        = text.CurrentChar;
            bool hasAlpha = false;

            while (c.IsAlphaUpper())
            {
                builder.Append(c);
                c        = text.NextChar();
                hasAlpha = true;
            }

            if (!hasAlpha || !c.IsWhitespace())
            {
                return(false);
            }

            // Regexp: "\\![A-Z]+\\s+[^>\\x00]*>"
            while (true)
            {
                builder.Append(c);
                c = text.NextChar();
                if (c == '\0')
                {
                    return(false);
                }

                if (c == '>')
                {
                    text.NextChar();
                    builder.Append('>');
                    return(true);
                }
            }
        }
コード例 #2
0
        private static bool TryParseHtmlTagCData(ref StringSlice text, ref ValueStringBuilder builder)
        {
            if (text.Match("[CDATA["))
            {
                builder.Append("[CDATA[");
                text.Start += 6;

                char c = '\0';
                while (true)
                {
                    var pc = c;
                    c = text.NextChar();
                    if (c == '\0')
                    {
                        return(false);
                    }

                    builder.Append(c);

                    if (c == ']' && pc == ']' && text.PeekChar() == '>')
                    {
                        text.SkipChar();
                        text.SkipChar();
                        builder.Append('>');
                        return(true);
                    }
                }
            }
            return(false);
        }
コード例 #3
0
        internal static bool TryParseHtmlCloseTag(ref StringSlice text, StringBuilder builder)
        {
            // </[A-Za-z][A-Za-z0-9]+\s*>
            builder.Append('/');

            var c = text.NextChar();

            if (!c.IsAlpha())
            {
                return(false);
            }
            builder.Append(c);

            bool skipSpaces = false;

            while (true)
            {
                c = text.NextChar();
                if (c == '>')
                {
                    text.NextChar();
                    builder.Append('>');
                    return(true);
                }

                if (skipSpaces)
                {
                    if (c != ' ')
                    {
                        break;
                    }
                }
                else if (c == ' ')
                {
                    skipSpaces = true;
                }
                else if (!(c.IsAlphaNumeric() || c == '-'))
                {
                    break;
                }

                builder.Append(c);
            }
            return(false);
        }
コード例 #4
0
        public static bool TryParseHtmlTag(ref StringSlice text, StringBuilder builder)
        {
            if (builder is null)
            {
                ThrowHelper.ArgumentNullException(nameof(builder));
            }
            var c = text.CurrentChar;

            if (c != '<')
            {
                return(false);
            }
            c = text.NextChar();

            builder.Append('<');

            switch (c)
            {
            case '/':
                return(TryParseHtmlCloseTag(ref text, builder));

            case '?':
                return(TryParseHtmlTagProcessingInstruction(ref text, builder));

            case '!':
                builder.Append(c);
                c = text.NextChar();
                if (c == '-')
                {
                    return(TryParseHtmlTagHtmlComment(ref text, builder));
                }

                if (c == '[')
                {
                    return(TryParseHtmlTagCData(ref text, builder));
                }

                return(TryParseHtmlTagDeclaration(ref text, builder));
            }

            return(TryParseHtmlTagOpenTag(ref text, builder));
        }
コード例 #5
0
        private static bool TryParseHtmlTagHtmlComment(ref StringSlice text, StringBuilder builder)
        {
            var c = text.NextChar();

            if (c != '-')
            {
                return(false);
            }
            builder.Append('-');
            builder.Append('-');
            if (text.PeekChar() == '>')
            {
                return(false);
            }

            var countHyphen = 0;

            while (true)
            {
                c = text.NextChar();
                if (c == '\0')
                {
                    return(false);
                }

                if (countHyphen == 2)
                {
                    if (c == '>')
                    {
                        builder.Append('>');
                        text.NextChar();
                        return(true);
                    }
                    return(false);
                }
                countHyphen = c == '-' ? countHyphen + 1 : 0;
                builder.Append(c);
            }
        }
コード例 #6
0
        private static bool TryParseHtmlTagCData(ref StringSlice text, StringBuilder builder)
        {
            builder.Append('[');
            var c = text.NextChar();

            if (c == 'C' &&
                text.NextChar() == 'D' &&
                text.NextChar() == 'A' &&
                text.NextChar() == 'T' &&
                text.NextChar() == 'A' &&
                (c = text.NextChar()) == '[')
            {
                builder.Append("CDATA[");
                while (true)
                {
                    var pc = c;
                    c = text.NextChar();
                    if (c == '\0')
                    {
                        return(false);
                    }

                    if (c == ']' && pc == ']')
                    {
                        builder.Append(']');
                        c = text.NextChar();
                        if (c == '>')
                        {
                            builder.Append('>');
                            text.NextChar();
                            return(true);
                        }

                        if (c == '\0')
                        {
                            return(false);
                        }
                    }
                    builder.Append(c);
                }
            }
            return(false);
        }
コード例 #7
0
        private static bool TryParseHtmlTag(ref StringSlice text, ref ValueStringBuilder builder)
        {
            var c = text.CurrentChar;

            if (c != '<')
            {
                return(false);
            }
            c = text.NextChar();

            builder.Append('<');

            switch (c)
            {
            case '/':
                return(TryParseHtmlCloseTag(ref text, ref builder));

            case '?':
                return(TryParseHtmlTagProcessingInstruction(ref text, ref builder));

            case '!':
                builder.Append(c);
                c = text.NextChar();
                if (c == '-')
                {
                    return(TryParseHtmlTagHtmlComment(ref text, ref builder));
                }

                if (c == '[')
                {
                    return(TryParseHtmlTagCData(ref text, ref builder));
                }

                return(TryParseHtmlTagDeclaration(ref text, ref builder));
            }

            return(TryParseHtmlTagOpenTag(ref text, ref builder));
        }
コード例 #8
0
        private static bool TryParseHtmlTagProcessingInstruction(ref StringSlice text, StringBuilder builder)
        {
            builder.Append('?');
            var prevChar = '\0';

            while (true)
            {
                var c = text.NextChar();
                if (c == '\0')
                {
                    return(false);
                }

                if (c == '>' && prevChar == '?')
                {
                    builder.Append('>');
                    text.NextChar();
                    return(true);
                }
                prevChar = c;
                builder.Append(c);
            }
        }
コード例 #9
0
        public static bool TryParseInlineLink(ref StringSlice text, out string link, out string title, out SourceSpan linkSpan, out SourceSpan titleSpan)
        {
            // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
            // 2. optional whitespace,  TODO: specs: is it whitespace or multiple whitespaces?
            // 3. an optional link destination,
            // 4. an optional link title separated from the link destination by whitespace,
            // 5. optional whitespace,  TODO: specs: is it whitespace or multiple whitespaces?
            // 6. and a right parenthesis )
            bool isValid = false;
            var  c       = text.CurrentChar;

            link  = null;
            title = null;

            linkSpan  = SourceSpan.Empty;
            titleSpan = SourceSpan.Empty;

            // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
            if (c == '(')
            {
                text.NextChar();
                text.TrimStart();

                var pos = text.Start;
                if (TryParseUrl(ref text, out link))
                {
                    linkSpan.Start = pos;
                    linkSpan.End   = text.Start - 1;
                    if (linkSpan.End < linkSpan.Start)
                    {
                        linkSpan = SourceSpan.Empty;
                    }

                    int spaceCount;
                    text.TrimStart(out spaceCount);
                    var hasWhiteSpaces = spaceCount > 0;

                    c = text.CurrentChar;
                    if (c == ')')
                    {
                        isValid = true;
                    }
                    else if (hasWhiteSpaces)
                    {
                        c   = text.CurrentChar;
                        pos = text.Start;
                        if (c == ')')
                        {
                            isValid = true;
                        }
                        else if (TryParseTitle(ref text, out title))
                        {
                            titleSpan.Start = pos;
                            titleSpan.End   = text.Start - 1;
                            if (titleSpan.End < titleSpan.Start)
                            {
                                titleSpan = SourceSpan.Empty;
                            }
                            text.TrimStart();
                            c = text.CurrentChar;

                            if (c == ')')
                            {
                                isValid = true;
                            }
                        }
                    }
                }
            }

            if (isValid)
            {
                // Skip ')'
                text.NextChar();
                title = title ?? String.Empty;
            }

            return(isValid);
        }
コード例 #10
0
        public static bool TryParseAutolink(ref StringSlice text, out string link, out bool isEmail)
        {
            link    = null;
            isEmail = false;

            var c = text.CurrentChar;

            if (c != '<')
            {
                return(false);
            }

            // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
            // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
            // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).
            // A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop.

            // a scheme is any sequence of 2–32 characters
            // beginning with an ASCII letter
            // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”).

            // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec:
            // /^
            // [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
            // @
            // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
            // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/

            c = text.NextChar();

            // -1: scan email
            //  0: scan uri or email
            // +1: scan uri
            int state = 0;

            if (!c.IsAlpha())
            {
                // We may have an email char?
                if (c.IsDigit() || CharHelper.IsEmailUsernameSpecialChar(c))
                {
                    state = -1;
                }
                else
                {
                    return(false);
                }
            }

            var builder = StringBuilderCache.Local();

            // ****************************
            // 1. Scan scheme or user email
            // ****************************
            builder.Append(c);
            while (true)
            {
                c = text.NextChar();

                // Chars valid for both scheme and email
                var isSpecialChar = c == '+' || c == '.' || c == '-';
                var isValidChar   = c.IsAlphaNumeric() || isSpecialChar;
                if (state <= 0 && CharHelper.IsEmailUsernameSpecialChar(c))
                {
                    isValidChar = true;
                    // If this is not a special char valid also for url scheme, then we have an email
                    if (!isSpecialChar)
                    {
                        state = -1;
                    }
                }

                if (isValidChar)
                {
                    // a scheme is any sequence of 2–32 characters
                    if (state > 0 && builder.Length >= 32)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    builder.Append(c);
                }
                else if (c == ':')
                {
                    if (state < 0 || builder.Length <= 2)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    state = 1;
                    break;
                }
                else if (c == '@')
                {
                    if (state > 0)
                    {
                        builder.Length = 0;
                        return(false);
                    }
                    state = -1;
                    break;
                }
                else
                {
                    builder.Length = 0;
                    return(false);
                }
            }

            // append ':' or '@'
            builder.Append(c);

            if (state < 0)
            {
                isEmail = true;

                // scan an email
                // [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
                // (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
                bool hasMinus        = false;
                int  domainCharCount = 0;
                char pc = '\0';
                while (true)
                {
                    c = text.NextChar();
                    if (c == '>')
                    {
                        if (domainCharCount == 0 || hasMinus)
                        {
                            break;
                        }

                        text.NextChar();
                        link           = builder.ToString();
                        builder.Length = 0;
                        return(true);
                    }

                    if (c.IsAlphaNumeric() || (domainCharCount > 0 && (hasMinus = c == '-')))
                    {
                        domainCharCount++;
                        if (domainCharCount > 63)
                        {
                            break;
                        }
                    }
                    else if (c == '.')
                    {
                        if (pc == '.' || pc == '-')
                        {
                            break;
                        }
                        domainCharCount = 0;
                        hasMinus        = false;
                    }
                    else
                    {
                        break;
                    }
                    builder.Append(c);
                }
            }
            else
            {
                // scan an uri
                // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
                // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
                // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).

                while (true)
                {
                    c = text.NextChar();
                    if (c == '\0')
                    {
                        break;
                    }

                    if (c == '>')
                    {
                        text.NextChar();
                        link           = builder.ToString();
                        builder.Length = 0;
                        return(true);
                    }

                    // Chars valid for both scheme and email
                    if (c > ' ' && c < 127 && c != '<')
                    {
                        builder.Append(c);
                    }
                    else
                    {
                        break;
                    }
                }
            }

            builder.Length = 0;
            return(false);
        }
コード例 #11
0
        internal static bool TryParseHtmlTagOpenTag(ref StringSlice text, StringBuilder builder)
        {
            var c = text.CurrentChar;

            // Parse the tagname
            if (!c.IsAlpha())
            {
                return(false);
            }
            builder.Append(c);

            while (true)
            {
                c = text.NextChar();
                if (c.IsAlphaNumeric() || c == '-')
                {
                    builder.Append(c);
                }
                else
                {
                    break;
                }
            }

            bool hasAttribute = false;

            while (true)
            {
                var hasWhitespaces = false;
                // Skip any whitespaces
                while (c.IsWhitespace())
                {
                    builder.Append(c);
                    c = text.NextChar();
                    hasWhitespaces = true;
                }

                switch (c)
                {
                case '\0':
                    return(false);

                case '>':
                    text.NextChar();
                    builder.Append(c);
                    return(true);

                case '/':
                    builder.Append('/');
                    c = text.NextChar();
                    if (c != '>')
                    {
                        return(false);
                    }
                    text.NextChar();
                    builder.Append('>');
                    return(true);

                case '=':

                    if (!hasAttribute)
                    {
                        return(false);
                    }

                    builder.Append('=');

                    // Skip any spaces after
                    c = text.NextChar();
                    while (c.IsWhitespace())
                    {
                        builder.Append(c);
                        c = text.NextChar();
                    }

                    // Parse a quoted string
                    if (c == '\'' || c == '\"')
                    {
                        builder.Append(c);
                        char openingStringChar = c;
                        while (true)
                        {
                            c = text.NextChar();
                            if (c == '\0')
                            {
                                return(false);
                            }
                            if (c != openingStringChar)
                            {
                                builder.Append(c);
                            }
                            else
                            {
                                break;
                            }
                        }
                        builder.Append(c);
                        c = text.NextChar();
                    }
                    else
                    {
                        // Parse until we match a space or a special html character
                        int matchCount = 0;
                        while (true)
                        {
                            if (c == '\0')
                            {
                                return(false);
                            }
                            if (c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`')
                            {
                                break;
                            }
                            matchCount++;
                            builder.Append(c);
                            c = text.NextChar();
                        }

                        // We need at least one char after '='
                        if (matchCount == 0)
                        {
                            return(false);
                        }
                    }

                    hasAttribute = false;
                    continue;

                default:
                    if (!hasWhitespaces)
                    {
                        return(false);
                    }

                    // Parse the attribute name
                    if (!(c.IsAlpha() || c == '_' || c == ':'))
                    {
                        return(false);
                    }
                    builder.Append(c);

                    while (true)
                    {
                        c = text.NextChar();
                        if (c.IsAlphaNumeric() || c == '_' || c == ':' || c == '.' || c == '-')
                        {
                            builder.Append(c);
                        }
                        else
                        {
                            break;
                        }
                    }

                    hasAttribute = true;
                    break;
                }
            }
        }