예제 #1
0
        private bool ParseUntilClosingTag(TagNode tag)
        {
            var   normalizedTagName = tag.Name.ToLowerInvariant();
            Regex matcher;

            lock (closingTagMatcherCache)
            {
                var closingTagExpr = "(?i)</(" + Regex.Escape(normalizedTagName) + @")(\s*)>";
                matcher = closingTagMatcherCache.TryGetValue(normalizedTagName);
                if (matcher == null)
                {
                    matcher = new Regex(closingTagExpr);
                    closingTagMatcherCache.Add(normalizedTagName, matcher);
                }
            }
            Match closingTagMatch;

            if (tag is ParserTag pt)
            {
                // For parser tags, we just read to the end.
                closingTagMatch = matcher.Match(fulltext, position);
                if (closingTagMatch.Success)
                {
                    pt.Content = fulltext.Substring(position, closingTagMatch.Index - position);
                    MovePositionTo(closingTagMatch.Index + closingTagMatch.Length);
                    goto CLOSE_TAG;
                }
                // If the parser tag doesn't close, then we fail. Pity.
                return(false);
            }
            // We'll parse into the tag.
            // But before the parsing begins,
            //  do a simple check of whether there will be a possible closing tag ahead.
            if (!options.AllowClosingMarkInference && !matcher.IsMatch(fulltext, position))
            {
                return(false);
            }
            var ht = (HtmlTag)tag;

            ParseStart(matcher.ToString(), false);
            ht.Content = ParseWikitext();
            Accept();
            // Consume the tag closing.
            var closingTag = ConsumeToken(matcher.ToString());

            if (closingTag == null)
            {
                if (options.AllowClosingMarkInference)
                {
                    tag.SetInferredClosingMark();
                    tag.ClosingTagName = null;
                    return(true);
                }
                return(false);
            }
            closingTagMatch = matcher.Match(closingTag);
CLOSE_TAG:
            Debug.Assert(closingTagMatch.Success);
            Debug.Assert(closingTagMatch.Groups[1].Success);
            Debug.Assert(closingTagMatch.Groups[2].Success);
            tag.ClosingTagName = closingTagMatch.Groups[1].Value != tag.Name
                ? closingTagMatch.Groups[1].Value
                : null;
            tag.ClosingTagTrailingWhitespace = closingTagMatch.Groups[2].Value;
            return(true);
        }
        private bool ParseUntilClosingTag(TagNode tag)
        {
            var   normalizedTagName = tag.Name.ToLowerInvariant();
            Regex matcher;
            var   closingTagExpr = "(?i)</(" + Regex.Escape(normalizedTagName) + @")(\s*)>";

            lock (closingTagMatcherCache)
            {
                matcher = closingTagMatcherCache.TryGetValue(normalizedTagName);
                if (matcher == null)
                {
                    matcher = new Regex(closingTagExpr);
                    closingTagMatcherCache.Add(normalizedTagName, matcher);
                }
            }
            Match closingTagMatch;

            if (tag is ParserTag pt)
            {
                // For parser tags, we just read to the end.
                closingTagMatch = matcher.Match(fulltext, position);
                if (closingTagMatch.Success)
                {
                    pt.Content = fulltext.Substring(position, closingTagMatch.Index - position);
                    MovePositionTo(closingTagMatch.Index + closingTagMatch.Length);
                    goto CLOSE_TAG;
                }
                // If the parser tag doesn't close, then we fail. Pity.
                return(false);
            }
            // We'll parse into the innerHTML of HTML tag.
            var ht = (HtmlTag)tag;

            if (normalizedTagName == "li")
            {
                // LI_TAG
                // <li> can be closed by </li>, <li ...> or EOF
                // <li> tags closed by <li ...>, or EOF are using TagStyle.NotClosed .
                ParseStart(@"</li\s*>|<li(\s*>|\s+)", false);
            }
            else
            {
                // OTHER_TAG
                ParseStart(closingTagExpr, false);
            }
            ht.Content = ParseWikitext();
            Accept();
            // Try to consume the tag closing.
            var closingTag = ConsumeToken(closingTagExpr);

            if (closingTag == null)
            {
                // Unbalanced HTML tag. This is a pathological case.
                // We won't backtrack here, as this is expensive and MW forces to close tags at EOF.
                tag.SetInferredClosingMark();
                tag.ClosingTagName = null;
                tag.TagStyle       = TagStyle.NotClosed;
                return(true);
            }
            closingTagMatch = matcher.Match(closingTag);
CLOSE_TAG:
            Debug.Assert(closingTagMatch.Success);
            Debug.Assert(closingTagMatch.Groups[1].Success);
            Debug.Assert(closingTagMatch.Groups[2].Success);
            tag.ClosingTagName = closingTagMatch.Groups[1].Value != tag.Name
                ? closingTagMatch.Groups[1].Value
                : null;
            tag.ClosingTagTrailingWhitespace = closingTagMatch.Groups[2].Value;
            return(true);
        }