public static List <HtmlTag> GetElementsByTagName(this HtmlDocument document, string tagName, EndTag endTag) { string html = document.InnerDocument.Replace('\n', ' ').Replace('\0', ' '); MatchCollection matches = Regex.Matches(html, string.Format(@"(<{0}\s.*?>)|(<{0}>)", tagName), RegexOptions.IgnoreCase | RegexOptions.Compiled); List <HtmlTag> tags = new List <HtmlTag>(); foreach (Match match in matches) { HtmlTag tag = new HtmlTag(); tag.EndTagType = endTag; tag.Name = tagName.ToUpper(); if (endTag == EndTag.EndTag) { var s = Regex.Matches(match.Value, @"\w+"); if (s.Count > 0) { tag.Html = TagReader.ReadToEndTag(html, tagName, match); } } else { tag.Html = match.Value; } tags.Add(tag); } return(tags); }
private List <HtmlTag> GetChildNodes() { List <HtmlTag> nodes = new List <HtmlTag>(); if (EndTagType != EndTag.EndTag) { return(nodes); } string tempInner = InnerHtml.Replace('\n', ' ').Replace('\0', ' '); var temp = Regex.Matches(tempInner, @"(?!</)(<(\w+).*?>)", RegexOptions.Compiled | RegexOptions.IgnoreCase); while (temp.Count > 0) { EndTag endType = EndTag.SelfEnd; var node = new HtmlTag(); node.Name = temp[0].Groups[2].Value.ToUpper(); var s = Regex.Matches(temp[0].Value, @"\w+"); if (s.Count > 0) { node.Html = TagReader.ReadToEndTag(tempInner, node.Name, temp[0], out endType); } tempInner = tempInner.Substring(node.Html.Length); temp = Regex.Matches(tempInner, @"(?!</)(<(\w+).*?>)", RegexOptions.Compiled | RegexOptions.IgnoreCase); node.EndTagType = endType; nodes.Add(node); } return(nodes); }