public static void WrapWithHtmlTag(this HtmlNode node) { var pTag = new HtmlTagNode("p"); foreach (var child in node.Children) pTag.AddChild(child); var htmlTag = new HtmlTagNode("html"); htmlTag.AddChild(pTag); node.Children.Clear(); node.AddChild(htmlTag); }
/// <summary>Parses the HTML to an <see cref="HtmlNode"/>.</summary> /// <param name="html">The HTML.</param> /// <returns>The HTML node.</returns> /// <exception cref="InvalidOperationException">The HTML is malformed.</exception> public HtmlNode Parse(string html) { html = CleanUpHtml(html); var matches = Regex.Matches(html, "<(.*?)>|</(.*?)>|<(.*?)/>|([^<>]*)"); var tokens = matches .OfType <Group>() .Select(m => m.Value) .ToArray(); var root = new HtmlTagNode("html"); var stack = new Stack <HtmlNode>(); stack.Push(root); foreach (var token in tokens) { if (token.StartsWith("</") && token.EndsWith(">")) // end tag { var tag = token.Substring(2, token.Length - 3); var node = stack.Peek(); if (node is HtmlTagNode && ((HtmlTagNode)node).Name == tag) { stack.Pop(); } else if (!IgnoreMalformedHtml) { throw new InvalidOperationException("The HTML is malformed at token '<" + token + ">'."); } } else if (token.StartsWith("<") && token.EndsWith("/>")) // full tag { var value = token.Substring(1, token.Length - 3); stack.Peek().AddChild(new HtmlTagNode(value)); } else if (token.StartsWith("<") && token.EndsWith(">")) // start tag { var value = token.Substring(1, token.Length - 2); var node = new HtmlTagNode(value); stack.Peek().AddChild(node); stack.Push(node); } else if (!string.IsNullOrEmpty(token)) // text { stack.Peek().AddChild(new HtmlTextNode(token)); } } return(root); }
/// <summary>Parses the HTML to an <see cref="HtmlNode"/>.</summary> /// <param name="html">The HTML.</param> /// <returns>The HTML node.</returns> /// <exception cref="InvalidOperationException">The HTML is malformed.</exception> public HtmlNode Parse(string html) { html = CleanUpHtml(html); var matches = Regex.Matches(html, "<(.*?)>|</(.*?)>|<(.*?)/>|([^<>]*)"); var tokens = matches .OfType<Group>() .Select(m => m.Value) .ToArray(); var root = new HtmlTagNode("html"); var stack = new Stack<HtmlNode>(); stack.Push(root); foreach (var token in tokens) { if (token.StartsWith("</") && token.EndsWith(">")) // end tag { var tag = token.Substring(2, token.Length - 3); var node = stack.Peek(); if (node is HtmlTagNode && ((HtmlTagNode)node).Name == tag) stack.Pop(); else if (!IgnoreMalformedHtml) throw new InvalidOperationException("The HTML is malformed at token '<"+ token + ">'."); } else if (token.StartsWith("<") && token.EndsWith("/>")) // full tag { var value = token.Substring(1, token.Length - 3); stack.Peek().AddChild(new HtmlTagNode(value)); } else if (token.StartsWith("<") && token.EndsWith(">")) // start tag { var value = token.Substring(1, token.Length - 2); var node = new HtmlTagNode(value); stack.Peek().AddChild(node); stack.Push(node); } else if (!string.IsNullOrEmpty(token)) // text stack.Peek().AddChild(new HtmlTextNode(token)); } return root; }