private static string GetTrackbackUrl(string url, Document document)
        {
            // First try the Typepad
            Node targetNode = document.DocumentNode.SelectSingleNode(".//div[@class=\"trackbacks-info\"]/p/span[@class=\"trackbacks-link\"]");

            if (targetNode != null)
            {
                return(targetNode.InnerText);
            }

            // Then B2Evolution
            targetNode = document.DocumentNode.SelectSingleNode(".//p[@class=\"trackback_url\"]/a");

            if ((targetNode != null) && targetNode.HasAttributes)
            {
                return(targetNode.GetAttributeValue("href", null));
            }

            // And at last, check wordpress
            targetNode = document.DocumentNode.SelectSingleNode("//meta[@name=\"generator\"]");

            if (targetNode != null)
            {
                string generator = targetNode.GetAttributeValue("content", string.Empty).Trim();

                if (generator.StartsWith("WordPress", StringComparison.OrdinalIgnoreCase))
                {
                    return(url + (!url.EndsWith("/", StringComparison.OrdinalIgnoreCase) ? "/" : string.Empty) + "trackback/");
                }
            }

            return(null);
        }
 private void ConvertContentTo(Node node, TextWriter outText)
 {
     foreach (var childNode in node.ChildNodes)
     {
         ConvertTo(childNode, outText);
     }
 }
Exemple #3
0
        private string GetContent(Document document)
        {
            string body = string.Empty;

            try
            {
                Node bodyNode    = document.DocumentNode.SelectSingleNode("//body");
                Node contentNode = null;

                if (bodyNode != null)
                {
                    contentNode = TryToFindContentNode(bodyNode) ?? bodyNode;
                }

                if (bodyNode != null)
                {
                    using (StringWriter writer = new StringWriter(Constants.CurrentCulture))
                    {
                        ConvertTo(contentNode, writer);
                        writer.Flush();

                        body = writer.ToString();
                    }
                }
            }
            catch (NullReferenceException)
            {
            }

            return(body.Trim('\t').Trim());
        }
Exemple #4
0
        private void ConvertTo(Node node, TextWriter outText)
        {
            switch (node.NodeType)
            {
            case NodeType.Element:
            {
                string nodeName = node.Name;

                if ((string.Compare(nodeName, "p", StringComparison.OrdinalIgnoreCase) == 0) ||
                    (string.Compare(nodeName, "br", StringComparison.OrdinalIgnoreCase) == 0) ||
                    (string.Compare(nodeName, "hr", StringComparison.OrdinalIgnoreCase) == 0))
                {
                    outText.Write("\r\n");
                }
                if (string.Compare(nodeName, "h1", StringComparison.OrdinalIgnoreCase) != 0)
                {
                    if (node.HasChildNodes)
                    {
                        ConvertContentTo(node, outText);
                    }
                }

                break;
            }

            case NodeType.Text:
            {
                string parentName = node.ParentNode.Name;

                if ((string.Compare(parentName, "script", StringComparison.OrdinalIgnoreCase) == 0) ||
                    (string.Compare(parentName, "style", StringComparison.OrdinalIgnoreCase) == 0))
                {
                    break;
                }

                string html = ((TextNode)node).Text;

                if (Node.IsOverlappedClosingElement(html))
                {
                    break;
                }

                DateTime parsedDateTime;

                if (html.Trim().Length == 0 ||
                    html.Length > 2 && string.Compare(html.Substring(0, 3), "by ", StringComparison.OrdinalIgnoreCase) == 0 ||
                    DateTime.TryParse(html.Trim(), out parsedDateTime))
                {
                    break;
                }

                outText.Write(Entity.DeEntitize(html));
                break;
            }
            }
        }
 private static void RemoveUnwantedNodes(Node node)
 {
     if (Array.IndexOf(_allowedElements, node.Name) > -1)
     {
         foreach (Node childNode in node.ChildNodes)
         {
             RemoveUnwantedNodes(childNode);
         }
     }
     else
     {
         node.ParentNode.RemoveChild(node, false);
     }
 }
        private Node TryToFindContentNode(Node bodyNode)
        {
            Node contentNode = null;

            foreach (string xPath in _xPaths)
            {
                contentNode = bodyNode.SelectSingleNode(xPath);

                if (contentNode != null)
                {
                    break;
                }
            }

            return(contentNode);
        }
        private string GetContent(Document document)
        {
            Node bodyNode    = document.DocumentNode.SelectSingleNode("//body");
            Node contentNode = null;

            if (bodyNode != null)
            {
                contentNode = TryToFindContentNode(bodyNode) ?? bodyNode;
            }

            string body;

            using (StringWriter writer = new StringWriter(Constants.CurrentCulture))
            {
                ConvertTo(contentNode, writer);
                writer.Flush();

                body = writer.ToString();
            }

            return(body.Trim('\t').Trim());
        }
        private static string GetTitle(Document document)
        {
            Node titleNode = document.DocumentNode.SelectSingleNode("//title");

            return((titleNode == null) ? null : titleNode.InnerText.Trim('\t').Trim());
        }
Exemple #9
0
 //!pastedoc M:Sawmill.Rewriter.GetChildren``1(Sawmill.IRewriter{``0},``0)
 /// <summary>
 ///     Get the immediate children of the value.
 ///     <seealso cref="M:Sawmill.IRewritable`1.GetChildren(System.Span{`0})" /></summary>
 /// <example>
 ///     Given a representation of the expression <c>(1+2)+3</c>,
 ///     <code>
 ///     Expr expr = new Add(
 ///         new Add(
 ///             new Lit(1),
 ///             new Lit(2)
 ///         ),
 ///         new Lit(3)
 ///     );
 ///     </code><see cref="M:Sawmill.Rewriter.GetChildren``1(Sawmill.IRewriter{``0},``0)" /> returns the immediate children of the topmost node.
 ///     <code>
 ///     Expr[] expected = new[]
 ///         {
 ///             new Add(
 ///                 new Lit(1),
 ///                 new Lit(2)
 ///             ),
 ///             new Lit(3)
 ///         };
 ///     Assert.Equal(expected, rewriter.GetChildren(expr));
 ///     </code></example>
 /// <param name="value">The value</param>
 /// <returns>The immediate children of <paramref name="value" /></returns>
 /// <seealso cref="M:Sawmill.Rewriter.GetChildren``1(Sawmill.IRewriter{``0},``0)"/>
 public static global::HtmlAgilityPack.HtmlNode[] GetChildren(this global::HtmlAgilityPack.HtmlNode value)
 => HtmlNodeRewriter.Instance.GetChildren(value);
Exemple #10
0
 //!pastedoc M:Sawmill.Rewriter.SelfAndDescendantsBreadthFirst``1(Sawmill.IRewriter{``0},``0)
 /// <summary>
 ///     Yields all of the nodes in the tree represented by <paramref name="value" /> in a breadth-first traversal order.
 ///
 ///     <para>
 ///     This is a breadth-first pre-order traversal.
 ///     </para></summary>
 /// <param name="value">The value to traverse</param>
 /// <returns>An enumerable containing all of the nodes in the tree represented by <paramref name="value" /> in a breadth-first traversal order.</returns>
 /// <seealso cref="M:Sawmill.Rewriter.SelfAndDescendantsBreadthFirst``1(Sawmill.IRewriter{``0},``0)"/>
 public static IEnumerable <global::HtmlAgilityPack.HtmlNode> SelfAndDescendantsBreadthFirst(this global::HtmlAgilityPack.HtmlNode value)
 => HtmlNodeRewriter.Instance.SelfAndDescendantsBreadthFirst(value);
Exemple #11
0
 //!pastedoc M:Sawmill.Rewriter.DescendantsAndSelf``1(Sawmill.IRewriter{``0},``0)
 /// <summary>
 ///     Yields all of the nodes in the tree represented by <paramref name="value" />, starting at the bottom.
 ///
 ///     <para>
 ///     This is a depth-first post-order traversal.
 ///     </para><seealso cref="M:Sawmill.Rewriter.SelfAndDescendants``1(Sawmill.IRewriter{``0},``0)" /></summary>
 /// <example>
 ///   <code>
 ///     Expr expr = new Add(
 ///         new Add(
 ///             new Lit(1),
 ///             new Lit(2)
 ///         ),
 ///         new Lit(3)
 ///     );
 ///     Expr[] expected = new[]
 ///         {
 ///             new Lit(1),
 ///             new Lit(2),
 ///             new Add(new Lit(1), new Lit(2)),
 ///             new Lit(3),
 ///             expr
 ///         };
 ///     Assert.Equal(expected, rewriter.DescendantsAndSelf(expr));
 ///     </code>
 /// </example>
 /// <param name="value">The value to traverse</param>
 /// <returns>An enumerable containing all of the nodes in the tree represented by <paramref name="value" />, starting at the bottom.</returns>
 /// <seealso cref="M:Sawmill.Rewriter.DescendantsAndSelf``1(Sawmill.IRewriter{``0},``0)"/>
 public static IEnumerable <global::HtmlAgilityPack.HtmlNode> DescendantsAndSelf(this global::HtmlAgilityPack.HtmlNode value)
 => HtmlNodeRewriter.Instance.DescendantsAndSelf(value);
Exemple #12
0
 //!pastedoc M:Sawmill.IRewriter`1.SetChildren(System.ReadOnlySpan{`0},`0)
 /// <summary>
 ///     Set the immediate children of the value.
 ///     <para>
 ///     Callers should ensure that <paramref name="newChildren" /> contains the same number of children as was returned by
 ///     <see cref="M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)" />.
 ///     </para><seealso cref="M:Sawmill.IRewritable`1.SetChildren(System.ReadOnlySpan{`0})" /></summary>
 /// <example>
 ///     Given a representation of the expression <c>(1+2)+3</c>,
 ///     <code>
 ///     Expr expr = new Add(
 ///         new Add(
 ///             new Lit(1),
 ///             new Lit(2)
 ///         ),
 ///         new Lit(3)
 ///     );
 ///     </code><see cref="M:Sawmill.IRewriter`1.SetChildren(System.ReadOnlySpan{`0},`0)" /> replaces the immediate children of the topmost node.
 ///     <code>
 ///     Expr expected = new Add(
 ///         new Lit(4),
 ///         new Lit(5)
 ///     );
 ///     Assert.Equal(expected, rewriter.SetChildren(Children.Two(new Lit(4), new Lit(5)), expr));
 ///     </code></example>
 /// <param name="newChildren">The new children</param>
 /// <param name="value">The old value, whose immediate children should be replaced</param>
 /// <returns>A copy of <paramref name="value" /> with updated children.</returns>
 /// <seealso cref="M:Sawmill.IRewriter`1.SetChildren(System.ReadOnlySpan{`0},`0)"/>
 public static global::HtmlAgilityPack.HtmlNode SetChildren(this global::HtmlAgilityPack.HtmlNode value, ReadOnlySpan <global::HtmlAgilityPack.HtmlNode> newChildren)
 => HtmlNodeRewriter.Instance.SetChildren(newChildren, value);
Exemple #13
0
 //!pastedoc M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)
 /// <summary>
 ///     Copy the immediate children of the value into <paramref name="childrenReceiver" />.
 ///     <seealso cref="M:Sawmill.IRewritable`1.GetChildren(System.Span{`0})" /></summary>
 /// <example>
 ///     Given a representation of the expression <c>(1+2)+3</c>,
 ///     <code>
 ///     Expr expr = new Add(
 ///         new Add(
 ///             new Lit(1),
 ///             new Lit(2)
 ///         ),
 ///         new Lit(3)
 ///     );
 ///     </code><see cref="M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)" /> copies the immediate children of the topmost node into the span.
 ///     <code>
 ///     Expr[] expected = new[]
 ///         {
 ///             new Add(
 ///                 new Lit(1),
 ///                 new Lit(2)
 ///             ),
 ///             new Lit(3)
 ///         };
 ///     var array = new Expr[rewriter.CountChildren(expr)];
 ///     rewriter.GetChildren(array, expr);
 ///     Assert.Equal(expected, array);
 ///     </code></example>
 /// <param name="childrenReceiver">
 ///     A <see cref="T:System.Span`1" /> to copy <paramref name="value" />'s immediate children into.
 ///     The <see cref="T:System.Span`1" />'s <see cref="P:System.Span`1.Length" /> will be equal to the number returned by <see cref="M:Sawmill.IRewriter`1.CountChildren(`0)" />.
 ///     </param>
 /// <param name="value">The value</param>
 /// <seealso cref="M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)"/>
 public static void GetChildren(this global::HtmlAgilityPack.HtmlNode value, Span <global::HtmlAgilityPack.HtmlNode> childrenReceiver)
 => HtmlNodeRewriter.Instance.GetChildren(childrenReceiver, value);
Exemple #14
0
        private string GetTitle(Document document)
        {
            Node titleNode = document.DocumentNode.SelectSingleNode("//title");

            return((titleNode == null) ? null : titleNode.InnerText.Trim('\t').Replace(_titleFilters, string.Empty).Trim());
        }