Exemplo n.º 1
0
        private static void forceMaxDepth(Html.Node n, int depth)
        {
            if (depth == 0)
            {
                n.Type       = Html.NodeType.Text;
                n.FirstChild = null;
                n.LastChild  = null;
                n.Attr.Clear();
                n.DataAtom = 0;
                n.Data     = "[omitted]";
                while (n.NextSibling != null)
                {
                    n.Parent.RemoveChild(n.NextSibling);
                }
                return;
            }

            if (n.Type != Html.NodeType.Element)
            {
                return;
            }

            for (var c = n.FirstChild; c != null; c = c.NextSibling)
            {
                forceMaxDepth(c, depth - 1);
            }
        }
Exemplo n.º 2
0
        private static Html.Node[] cleanNodes(Config c, Html.Node[] nodes)
        {
            if (c == null)
            {
                c = Config.DefaultConfig;
            }

            for (int i = 0; i < nodes.Length; i++)
            {
                nodes[i] = filterNode(c, nodes[i]);
                if (nodes[i].DataAtom == Html.Atom.Li)
                {
                    var wrapper = new Html.Node
                    {
                        Type     = Html.NodeType.Element,
                        Data     = "ul",
                        DataAtom = Html.Atom.Ul,
                    };
                    wrapper.AppendChild(nodes[i]);
                    nodes[i] = wrapper;
                }
            }

            if (c.WrapText)
            {
                nodes = wrapText(nodes);
            }

            return(nodes);
        }
Exemplo n.º 3
0
        private static void cleanChildren(Config c, Html.Node parent)
        {
            var children = new List <Html.Node>();

            while (parent.FirstChild != null)
            {
                var child = parent.FirstChild;
                parent.RemoveChild(child);
                children.Add(filterNode(c, child));
            }

            if (c.WrapText)
            {
                var ok = c.wrap.Contains(parent.DataAtom);
                if (!ok && parent.DataAtom == 0)
                {
                    ok = c.wrapCustom.Contains(parent.Data);
                }
                if (ok)
                {
                    var wrapped = wrapText(children.ToArray());
                    children.Clear();
                    children.AddRange(wrapped);
                }
            }

            foreach (var child in children)
            {
                parent.AppendChild(child);
            }
        }
Exemplo n.º 4
0
 // CleanNode cleans an HTML node using the specified config. Text nodes are
 // returned as-is. Element nodes are recursively  checked for legality and have
 // their attributes checked for legality as well. Elements with illegal
 // attributes are copied and the problematic attributes are removed. Elements
 // that are not in the set of legal elements are replaced with a textual
 // version of their source code.
 public static Html.Node CleanNode(Config c, Html.Node n)
 {
     if (c == null)
     {
         c = Config.DefaultConfig;
     }
     return(filterNode(c, deepCopy(n)));
 }
Exemplo n.º 5
0
        private static Html.Node cleanNode(Config c, Html.Node n)
        {
            var ok1 = c.elem.TryGetValue(n.DataAtom, out var allowedAttr);
            var ok2 = c.elemCustom.TryGetValue(n.Data, out var customAttr);

            if (ok1 || ok2)
            {
                cleanChildren(c, n);

                var haveSrc = false;

                var attrs = n.Attr.ToArray();
                n.Attr.Clear();
                foreach (var attr in attrs)
                {
                    var a = Html.Atom.Lookup(Encoding.UTF8.GetBytes(attr.Key));

                    Regex re1 = null, re2 = null;
                    ok1 = allowedAttr?.TryGetValue(a, out re1) ?? false;
                    ok2 = customAttr?.TryGetValue(attr.Key, out re2) ?? false;
                    var ok3 = c.attr.Contains(a);
                    var ok4 = c.attrCustom.Contains(attr.Key);

                    if (attr.Namespace != "" || (!ok1 && !ok2 && !ok3 && !ok4))
                    {
                        continue;
                    }

                    if (!cleanURL(c, a, attr))
                    {
                        continue;
                    }

                    if (re1 != null && !re1.IsMatch(attr.Val))
                    {
                        continue;
                    }
                    if (re2 != null && !re2.IsMatch(attr.Val))
                    {
                        continue;
                    }

                    haveSrc = haveSrc || a == Html.Atom.Src;

                    n.Attr.Add(attr);
                }

                if (n.DataAtom == Html.Atom.Img && !haveSrc)
                {
                    // replace it with an empty text node
                    return(text(""));
                }

                return(n);
            }
            return(text(Html.UnescapeString(Render(n))));
        }
Exemplo n.º 6
0
 private static Html.Node filterNode(Config c, Html.Node n)
 {
     if (n.Type == Html.NodeType.Text)
     {
         return(n);
     }
     if (n.Type == Html.NodeType.Comment && !c.EscapeComments)
     {
         return(n);
     }
     if (n.Type != Html.NodeType.Element)
     {
         return(text(Render(n)));
     }
     return(cleanNode(c, n));
 }
Exemplo n.º 7
0
        private static Html.Node deepCopy(Html.Node n)
        {
            var clone = new Html.Node
            {
                Type      = n.Type,
                Attr      = n.Attr,
                Namespace = n.Namespace,
                Data      = n.Data,
                DataAtom  = n.DataAtom,
            };

            for (var c = n.FirstChild; c != null; c = c.NextSibling)
            {
                clone.AppendChild(deepCopy(c));
            }
            return(clone);
        }
Exemplo n.º 8
0
        private static Html.Node[] wrapText(Html.Node[] nodes)
        {
            var wrapped = new List <Html.Node>(nodes.Length);

            Html.Node wrapper = null;
            void appendWrapper()
            {
                if (wrapper != null)
                {
                    // render and re-parse so p-inline-p expands
                    wrapped.AddRange(ParseDepth(Render(wrapper), 0));
                    wrapper = null;
                }
            }

            foreach (var n in nodes)
            {
                if (n.Type == Html.NodeType.Element && isBlockElement.Contains(n.DataAtom))
                {
                    appendWrapper();
                    wrapped.Add(n);
                    continue;
                }
                if (wrapper == null && n.Type == Html.NodeType.Text && n.Data.Trim() == "")
                {
                    wrapped.Add(n);
                    continue;
                }
                if (wrapper == null)
                {
                    wrapper = new Html.Node
                    {
                        Type     = Html.NodeType.Element,
                        Data     = "p",
                        DataAtom = Html.Atom.P,
                    };
                }

                wrapper.AppendChild(n);
            }
            appendWrapper();
            return(wrapped.ToArray());
        }