/// <summary> /// Creates an HTML node from a string representing literal HTML. /// </summary> /// <param name="html">The HTML text.</param> /// <returns>The newly created node instance.</returns> public static HtmlNode CreateNode(string html) { // REVIEW: this is *not* optimum... HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); return doc.DocumentNode.FirstChild; }
/// <summary> /// 得到安全的Html /// </summary> /// <param name="html"></param> /// <returns></returns> public static string GetSafeHtml(string html) { if (html.IsNullOrEmpty()) return html; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); List<HtmlNode> nodesToRemove = new List<HtmlNode>(); IEnumerator e = doc .CreateNavigator() .SelectDescendants(System.Xml.XPath.XPathNodeType.All, false) .GetEnumerator(); while (e.MoveNext()) { HtmlNode node = ((HtmlNodeNavigator)e.Current) .CurrentNode; if (_DefaultNodeBlackList.Contains(node.Name)) nodesToRemove.Add(node); else { FilterAttributes(node); FilterScriptRelAttributes(node); } } nodesToRemove.ForEach(node => node.ParentNode.RemoveChild(node)); StringBuilder strB = new StringBuilder(); using (StringWriter writer = new StringWriter(strB)) doc.Save(writer); return strB.ToString(); }