private static void forceMaxDepth(Html.Node n, int depth) { if (depth == 0) { n.Type = Html.NodeType.Text; n.FirstChild = null; n.LastChild = null; n.Attr.Clear(); n.DataAtom = 0; n.Data = "[omitted]"; while (n.NextSibling != null) { n.Parent.RemoveChild(n.NextSibling); } return; } if (n.Type != Html.NodeType.Element) { return; } for (var c = n.FirstChild; c != null; c = c.NextSibling) { forceMaxDepth(c, depth - 1); } }
private static Html.Node[] cleanNodes(Config c, Html.Node[] nodes) { if (c == null) { c = Config.DefaultConfig; } for (int i = 0; i < nodes.Length; i++) { nodes[i] = filterNode(c, nodes[i]); if (nodes[i].DataAtom == Html.Atom.Li) { var wrapper = new Html.Node { Type = Html.NodeType.Element, Data = "ul", DataAtom = Html.Atom.Ul, }; wrapper.AppendChild(nodes[i]); nodes[i] = wrapper; } } if (c.WrapText) { nodes = wrapText(nodes); } return(nodes); }
private static void cleanChildren(Config c, Html.Node parent) { var children = new List <Html.Node>(); while (parent.FirstChild != null) { var child = parent.FirstChild; parent.RemoveChild(child); children.Add(filterNode(c, child)); } if (c.WrapText) { var ok = c.wrap.Contains(parent.DataAtom); if (!ok && parent.DataAtom == 0) { ok = c.wrapCustom.Contains(parent.Data); } if (ok) { var wrapped = wrapText(children.ToArray()); children.Clear(); children.AddRange(wrapped); } } foreach (var child in children) { parent.AppendChild(child); } }
// CleanNode cleans an HTML node using the specified config. Text nodes are // returned as-is. Element nodes are recursively checked for legality and have // their attributes checked for legality as well. Elements with illegal // attributes are copied and the problematic attributes are removed. Elements // that are not in the set of legal elements are replaced with a textual // version of their source code. public static Html.Node CleanNode(Config c, Html.Node n) { if (c == null) { c = Config.DefaultConfig; } return(filterNode(c, deepCopy(n))); }
private static Html.Node cleanNode(Config c, Html.Node n) { var ok1 = c.elem.TryGetValue(n.DataAtom, out var allowedAttr); var ok2 = c.elemCustom.TryGetValue(n.Data, out var customAttr); if (ok1 || ok2) { cleanChildren(c, n); var haveSrc = false; var attrs = n.Attr.ToArray(); n.Attr.Clear(); foreach (var attr in attrs) { var a = Html.Atom.Lookup(Encoding.UTF8.GetBytes(attr.Key)); Regex re1 = null, re2 = null; ok1 = allowedAttr?.TryGetValue(a, out re1) ?? false; ok2 = customAttr?.TryGetValue(attr.Key, out re2) ?? false; var ok3 = c.attr.Contains(a); var ok4 = c.attrCustom.Contains(attr.Key); if (attr.Namespace != "" || (!ok1 && !ok2 && !ok3 && !ok4)) { continue; } if (!cleanURL(c, a, attr)) { continue; } if (re1 != null && !re1.IsMatch(attr.Val)) { continue; } if (re2 != null && !re2.IsMatch(attr.Val)) { continue; } haveSrc = haveSrc || a == Html.Atom.Src; n.Attr.Add(attr); } if (n.DataAtom == Html.Atom.Img && !haveSrc) { // replace it with an empty text node return(text("")); } return(n); } return(text(Html.UnescapeString(Render(n)))); }
private static Html.Node filterNode(Config c, Html.Node n) { if (n.Type == Html.NodeType.Text) { return(n); } if (n.Type == Html.NodeType.Comment && !c.EscapeComments) { return(n); } if (n.Type != Html.NodeType.Element) { return(text(Render(n))); } return(cleanNode(c, n)); }
private static Html.Node deepCopy(Html.Node n) { var clone = new Html.Node { Type = n.Type, Attr = n.Attr, Namespace = n.Namespace, Data = n.Data, DataAtom = n.DataAtom, }; for (var c = n.FirstChild; c != null; c = c.NextSibling) { clone.AppendChild(deepCopy(c)); } return(clone); }
private static Html.Node[] wrapText(Html.Node[] nodes) { var wrapped = new List <Html.Node>(nodes.Length); Html.Node wrapper = null; void appendWrapper() { if (wrapper != null) { // render and re-parse so p-inline-p expands wrapped.AddRange(ParseDepth(Render(wrapper), 0)); wrapper = null; } } foreach (var n in nodes) { if (n.Type == Html.NodeType.Element && isBlockElement.Contains(n.DataAtom)) { appendWrapper(); wrapped.Add(n); continue; } if (wrapper == null && n.Type == Html.NodeType.Text && n.Data.Trim() == "") { wrapped.Add(n); continue; } if (wrapper == null) { wrapper = new Html.Node { Type = Html.NodeType.Element, Data = "p", DataAtom = Html.Atom.P, }; } wrapper.AppendChild(n); } appendWrapper(); return(wrapped.ToArray()); }