Beispiel #1
0
        /// <summary>
        /// Simplify nested elements
        /// </summary>
        /// <param name="articleContent">The document</param>
        /// <returns>
        /// The clean title
        /// </returns>
        internal static void SimplifyNestedElements(IElement articleContent)
        {
            var node = articleContent;

            while (node != null)
            {
                if (node.Parent != null && (new string[] { "DIV", "SECTION" }).Contains(node.TagName) && !(!String.IsNullOrWhiteSpace(node.Id) && node.Id.StartsWith("readability")))
                {
                    if (NodeUtility.IsElementWithoutContent(node))
                    {
                        node = NodeUtility.RemoveAndGetNext(node) as IElement;
                        continue;
                    }
                    else if (NodeUtility.HasSingleTagInsideElement(node, "DIV") || NodeUtility.HasSingleTagInsideElement(node, "SECTION"))
                    {
                        var child = node.Children[0];
                        for (var i = 0; i < node.Attributes.Length; i++)
                        {
                            child.SetAttribute(node.Attributes[i].Name, node.Attributes[i].Value);
                        }
                        node.Parent.ReplaceChild(child, node);
                        node = child;
                        continue;
                    }
                }

                node = NodeUtility.GetNextNode(node);
            }
        }
Beispiel #2
0
        /// <summary>
        /// Clean out elements that match the specified conditions
        /// </summary>
        /// <param name="e">Element to operate on</param>
        /// <param name="filter">Filter function on match id/class combination</param>
        internal static void CleanMatchedNodes(IElement e, Func <IElement, string, bool> filter = null)
        {
            var endOfSearchMarkerNode = NodeUtility.GetNextNode(e, true);
            var next = NodeUtility.GetNextNode(e);

            while (next != null && next != endOfSearchMarkerNode)
            {
                if (filter(next, next.ClassName + " " + next.Id))
                {
                    next = NodeUtility.RemoveAndGetNext(next as INode) as IElement;
                }
                else
                {
                    next = NodeUtility.GetNextNode(next);
                }
            }
        }