private bool ParseUntilClosingTag(TagNode tag) { var normalizedTagName = tag.Name.ToLowerInvariant(); Regex matcher; lock (closingTagMatcherCache) { var closingTagExpr = "(?i)</(" + Regex.Escape(normalizedTagName) + @")(\s*)>"; matcher = closingTagMatcherCache.TryGetValue(normalizedTagName); if (matcher == null) { matcher = new Regex(closingTagExpr); closingTagMatcherCache.Add(normalizedTagName, matcher); } } Match closingTagMatch; if (tag is ParserTag pt) { // For parser tags, we just read to the end. closingTagMatch = matcher.Match(fulltext, position); if (closingTagMatch.Success) { pt.Content = fulltext.Substring(position, closingTagMatch.Index - position); MovePositionTo(closingTagMatch.Index + closingTagMatch.Length); goto CLOSE_TAG; } // If the parser tag doesn't close, then we fail. Pity. return(false); } // We'll parse into the tag. // But before the parsing begins, // do a simple check of whether there will be a possible closing tag ahead. if (!options.AllowClosingMarkInference && !matcher.IsMatch(fulltext, position)) { return(false); } var ht = (HtmlTag)tag; ParseStart(matcher.ToString(), false); ht.Content = ParseWikitext(); Accept(); // Consume the tag closing. var closingTag = ConsumeToken(matcher.ToString()); if (closingTag == null) { if (options.AllowClosingMarkInference) { tag.SetInferredClosingMark(); tag.ClosingTagName = null; return(true); } return(false); } closingTagMatch = matcher.Match(closingTag); CLOSE_TAG: Debug.Assert(closingTagMatch.Success); Debug.Assert(closingTagMatch.Groups[1].Success); Debug.Assert(closingTagMatch.Groups[2].Success); tag.ClosingTagName = closingTagMatch.Groups[1].Value != tag.Name ? closingTagMatch.Groups[1].Value : null; tag.ClosingTagTrailingWhitespace = closingTagMatch.Groups[2].Value; return(true); }
private bool ParseUntilClosingTag(TagNode tag) { var normalizedTagName = tag.Name.ToLowerInvariant(); Regex matcher; var closingTagExpr = "(?i)</(" + Regex.Escape(normalizedTagName) + @")(\s*)>"; lock (closingTagMatcherCache) { matcher = closingTagMatcherCache.TryGetValue(normalizedTagName); if (matcher == null) { matcher = new Regex(closingTagExpr); closingTagMatcherCache.Add(normalizedTagName, matcher); } } Match closingTagMatch; if (tag is ParserTag pt) { // For parser tags, we just read to the end. closingTagMatch = matcher.Match(fulltext, position); if (closingTagMatch.Success) { pt.Content = fulltext.Substring(position, closingTagMatch.Index - position); MovePositionTo(closingTagMatch.Index + closingTagMatch.Length); goto CLOSE_TAG; } // If the parser tag doesn't close, then we fail. Pity. return(false); } // We'll parse into the innerHTML of HTML tag. var ht = (HtmlTag)tag; if (normalizedTagName == "li") { // LI_TAG // <li> can be closed by </li>, <li ...> or EOF // <li> tags closed by <li ...>, or EOF are using TagStyle.NotClosed . ParseStart(@"</li\s*>|<li(\s*>|\s+)", false); } else { // OTHER_TAG ParseStart(closingTagExpr, false); } ht.Content = ParseWikitext(); Accept(); // Try to consume the tag closing. var closingTag = ConsumeToken(closingTagExpr); if (closingTag == null) { // Unbalanced HTML tag. This is a pathological case. // We won't backtrack here, as this is expensive and MW forces to close tags at EOF. tag.SetInferredClosingMark(); tag.ClosingTagName = null; tag.TagStyle = TagStyle.NotClosed; return(true); } closingTagMatch = matcher.Match(closingTag); CLOSE_TAG: Debug.Assert(closingTagMatch.Success); Debug.Assert(closingTagMatch.Groups[1].Success); Debug.Assert(closingTagMatch.Groups[2].Success); tag.ClosingTagName = closingTagMatch.Groups[1].Value != tag.Name ? closingTagMatch.Groups[1].Value : null; tag.ClosingTagTrailingWhitespace = closingTagMatch.Groups[2].Value; return(true); }