示例#1
0
        public static List <HtmlTag> GetElementsByTagName(this HtmlDocument document, string tagName, EndTag endTag)
        {
            string          html    = document.InnerDocument.Replace('\n', ' ').Replace('\0', ' ');
            MatchCollection matches = Regex.Matches(html, string.Format(@"(<{0}\s.*?>)|(<{0}>)", tagName), RegexOptions.IgnoreCase | RegexOptions.Compiled);
            List <HtmlTag>  tags    = new List <HtmlTag>();

            foreach (Match match in matches)
            {
                HtmlTag tag = new HtmlTag();
                tag.EndTagType = endTag;
                tag.Name       = tagName.ToUpper();
                if (endTag == EndTag.EndTag)
                {
                    var s = Regex.Matches(match.Value, @"\w+");
                    if (s.Count > 0)
                    {
                        tag.Html = TagReader.ReadToEndTag(html, tagName, match);
                    }
                }
                else
                {
                    tag.Html = match.Value;
                }
                tags.Add(tag);
            }
            return(tags);
        }
示例#2
0
        private List <HtmlTag> GetChildNodes()
        {
            List <HtmlTag> nodes = new List <HtmlTag>();

            if (EndTagType != EndTag.EndTag)
            {
                return(nodes);
            }
            string tempInner = InnerHtml.Replace('\n', ' ').Replace('\0', ' ');
            var    temp      = Regex.Matches(tempInner, @"(?!</)(<(\w+).*?>)", RegexOptions.Compiled | RegexOptions.IgnoreCase);

            while (temp.Count > 0)
            {
                EndTag endType = EndTag.SelfEnd;
                var    node    = new HtmlTag();
                node.Name = temp[0].Groups[2].Value.ToUpper();
                var s = Regex.Matches(temp[0].Value, @"\w+");
                if (s.Count > 0)
                {
                    node.Html = TagReader.ReadToEndTag(tempInner, node.Name, temp[0], out endType);
                }
                tempInner       = tempInner.Substring(node.Html.Length);
                temp            = Regex.Matches(tempInner, @"(?!</)(<(\w+).*?>)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                node.EndTagType = endType;
                nodes.Add(node);
            }
            return(nodes);
        }