public static void WrapWithHtmlTag(this HtmlNode node)
        {
            var pTag = new HtmlTagNode("p");
            foreach (var child in node.Children)
                pTag.AddChild(child);

            var htmlTag = new HtmlTagNode("html");
            htmlTag.AddChild(pTag);

            node.Children.Clear();
            node.AddChild(htmlTag);
        }
示例#2
0
        /// <summary>Parses the HTML to an <see cref="HtmlNode"/>.</summary>
        /// <param name="html">The HTML.</param>
        /// <returns>The HTML node.</returns>
        /// <exception cref="InvalidOperationException">The HTML is malformed.</exception>
        public HtmlNode Parse(string html)
        {
            html = CleanUpHtml(html);

            var matches = Regex.Matches(html, "<(.*?)>|</(.*?)>|<(.*?)/>|([^<>]*)");
            var tokens  = matches
                          .OfType <Group>()
                          .Select(m => m.Value)
                          .ToArray();

            var root  = new HtmlTagNode("html");
            var stack = new Stack <HtmlNode>();

            stack.Push(root);

            foreach (var token in tokens)
            {
                if (token.StartsWith("</") && token.EndsWith(">")) // end tag
                {
                    var tag  = token.Substring(2, token.Length - 3);
                    var node = stack.Peek();
                    if (node is HtmlTagNode && ((HtmlTagNode)node).Name == tag)
                    {
                        stack.Pop();
                    }
                    else if (!IgnoreMalformedHtml)
                    {
                        throw new InvalidOperationException("The HTML is malformed at token '<" + token + ">'.");
                    }
                }
                else if (token.StartsWith("<") && token.EndsWith("/>")) // full tag
                {
                    var value = token.Substring(1, token.Length - 3);
                    stack.Peek().AddChild(new HtmlTagNode(value));
                }
                else if (token.StartsWith("<") && token.EndsWith(">")) // start tag
                {
                    var value = token.Substring(1, token.Length - 2);
                    var node  = new HtmlTagNode(value);
                    stack.Peek().AddChild(node);
                    stack.Push(node);
                }
                else if (!string.IsNullOrEmpty(token)) // text
                {
                    stack.Peek().AddChild(new HtmlTextNode(token));
                }
            }

            return(root);
        }
示例#3
0
        /// <summary>Parses the HTML to an <see cref="HtmlNode"/>.</summary>
        /// <param name="html">The HTML.</param>
        /// <returns>The HTML node.</returns>
        /// <exception cref="InvalidOperationException">The HTML is malformed.</exception>
        public HtmlNode Parse(string html)
        {
            html = CleanUpHtml(html);

            var matches = Regex.Matches(html, "<(.*?)>|</(.*?)>|<(.*?)/>|([^<>]*)");
            var tokens = matches
                .OfType<Group>()
                .Select(m => m.Value)
                .ToArray();

            var root = new HtmlTagNode("html");
            var stack = new Stack<HtmlNode>();
            stack.Push(root);

            foreach (var token in tokens)
            {
                if (token.StartsWith("</") && token.EndsWith(">")) // end tag
                {
                    var tag = token.Substring(2, token.Length - 3);
                    var node = stack.Peek();
                    if (node is HtmlTagNode && ((HtmlTagNode)node).Name == tag)
                        stack.Pop();
                    else if (!IgnoreMalformedHtml)
                      throw new InvalidOperationException("The HTML is malformed at token '<"+ token + ">'.");
                }
                else if (token.StartsWith("<") && token.EndsWith("/>")) // full tag
                {
                    var value = token.Substring(1, token.Length - 3);
                    stack.Peek().AddChild(new HtmlTagNode(value));
                }
                else if (token.StartsWith("<") && token.EndsWith(">")) // start tag
                {
                    var value = token.Substring(1, token.Length - 2);
                    var node = new HtmlTagNode(value);
                    stack.Peek().AddChild(node);
                    stack.Push(node);
                }
                else if (!string.IsNullOrEmpty(token)) // text
                    stack.Peek().AddChild(new HtmlTextNode(token));
            }

            return root;
        }