private static string GetTrackbackUrl(string url, Document document) { // First try the Typepad Node targetNode = document.DocumentNode.SelectSingleNode(".//div[@class=\"trackbacks-info\"]/p/span[@class=\"trackbacks-link\"]"); if (targetNode != null) { return(targetNode.InnerText); } // Then B2Evolution targetNode = document.DocumentNode.SelectSingleNode(".//p[@class=\"trackback_url\"]/a"); if ((targetNode != null) && targetNode.HasAttributes) { return(targetNode.GetAttributeValue("href", null)); } // And at last, check wordpress targetNode = document.DocumentNode.SelectSingleNode("//meta[@name=\"generator\"]"); if (targetNode != null) { string generator = targetNode.GetAttributeValue("content", string.Empty).Trim(); if (generator.StartsWith("WordPress", StringComparison.OrdinalIgnoreCase)) { return(url + (!url.EndsWith("/", StringComparison.OrdinalIgnoreCase) ? "/" : string.Empty) + "trackback/"); } } return(null); }
private void ConvertContentTo(Node node, TextWriter outText) { foreach (var childNode in node.ChildNodes) { ConvertTo(childNode, outText); } }
private string GetContent(Document document) { string body = string.Empty; try { Node bodyNode = document.DocumentNode.SelectSingleNode("//body"); Node contentNode = null; if (bodyNode != null) { contentNode = TryToFindContentNode(bodyNode) ?? bodyNode; } if (bodyNode != null) { using (StringWriter writer = new StringWriter(Constants.CurrentCulture)) { ConvertTo(contentNode, writer); writer.Flush(); body = writer.ToString(); } } } catch (NullReferenceException) { } return(body.Trim('\t').Trim()); }
private void ConvertTo(Node node, TextWriter outText) { switch (node.NodeType) { case NodeType.Element: { string nodeName = node.Name; if ((string.Compare(nodeName, "p", StringComparison.OrdinalIgnoreCase) == 0) || (string.Compare(nodeName, "br", StringComparison.OrdinalIgnoreCase) == 0) || (string.Compare(nodeName, "hr", StringComparison.OrdinalIgnoreCase) == 0)) { outText.Write("\r\n"); } if (string.Compare(nodeName, "h1", StringComparison.OrdinalIgnoreCase) != 0) { if (node.HasChildNodes) { ConvertContentTo(node, outText); } } break; } case NodeType.Text: { string parentName = node.ParentNode.Name; if ((string.Compare(parentName, "script", StringComparison.OrdinalIgnoreCase) == 0) || (string.Compare(parentName, "style", StringComparison.OrdinalIgnoreCase) == 0)) { break; } string html = ((TextNode)node).Text; if (Node.IsOverlappedClosingElement(html)) { break; } DateTime parsedDateTime; if (html.Trim().Length == 0 || html.Length > 2 && string.Compare(html.Substring(0, 3), "by ", StringComparison.OrdinalIgnoreCase) == 0 || DateTime.TryParse(html.Trim(), out parsedDateTime)) { break; } outText.Write(Entity.DeEntitize(html)); break; } } }
private static void RemoveUnwantedNodes(Node node) { if (Array.IndexOf(_allowedElements, node.Name) > -1) { foreach (Node childNode in node.ChildNodes) { RemoveUnwantedNodes(childNode); } } else { node.ParentNode.RemoveChild(node, false); } }
private Node TryToFindContentNode(Node bodyNode) { Node contentNode = null; foreach (string xPath in _xPaths) { contentNode = bodyNode.SelectSingleNode(xPath); if (contentNode != null) { break; } } return(contentNode); }
private string GetContent(Document document) { Node bodyNode = document.DocumentNode.SelectSingleNode("//body"); Node contentNode = null; if (bodyNode != null) { contentNode = TryToFindContentNode(bodyNode) ?? bodyNode; } string body; using (StringWriter writer = new StringWriter(Constants.CurrentCulture)) { ConvertTo(contentNode, writer); writer.Flush(); body = writer.ToString(); } return(body.Trim('\t').Trim()); }
private static string GetTitle(Document document) { Node titleNode = document.DocumentNode.SelectSingleNode("//title"); return((titleNode == null) ? null : titleNode.InnerText.Trim('\t').Trim()); }
//!pastedoc M:Sawmill.Rewriter.GetChildren``1(Sawmill.IRewriter{``0},``0) /// <summary> /// Get the immediate children of the value. /// <seealso cref="M:Sawmill.IRewritable`1.GetChildren(System.Span{`0})" /></summary> /// <example> /// Given a representation of the expression <c>(1+2)+3</c>, /// <code> /// Expr expr = new Add( /// new Add( /// new Lit(1), /// new Lit(2) /// ), /// new Lit(3) /// ); /// </code><see cref="M:Sawmill.Rewriter.GetChildren``1(Sawmill.IRewriter{``0},``0)" /> returns the immediate children of the topmost node. /// <code> /// Expr[] expected = new[] /// { /// new Add( /// new Lit(1), /// new Lit(2) /// ), /// new Lit(3) /// }; /// Assert.Equal(expected, rewriter.GetChildren(expr)); /// </code></example> /// <param name="value">The value</param> /// <returns>The immediate children of <paramref name="value" /></returns> /// <seealso cref="M:Sawmill.Rewriter.GetChildren``1(Sawmill.IRewriter{``0},``0)"/> public static global::HtmlAgilityPack.HtmlNode[] GetChildren(this global::HtmlAgilityPack.HtmlNode value) => HtmlNodeRewriter.Instance.GetChildren(value);
//!pastedoc M:Sawmill.Rewriter.SelfAndDescendantsBreadthFirst``1(Sawmill.IRewriter{``0},``0) /// <summary> /// Yields all of the nodes in the tree represented by <paramref name="value" /> in a breadth-first traversal order. /// /// <para> /// This is a breadth-first pre-order traversal. /// </para></summary> /// <param name="value">The value to traverse</param> /// <returns>An enumerable containing all of the nodes in the tree represented by <paramref name="value" /> in a breadth-first traversal order.</returns> /// <seealso cref="M:Sawmill.Rewriter.SelfAndDescendantsBreadthFirst``1(Sawmill.IRewriter{``0},``0)"/> public static IEnumerable <global::HtmlAgilityPack.HtmlNode> SelfAndDescendantsBreadthFirst(this global::HtmlAgilityPack.HtmlNode value) => HtmlNodeRewriter.Instance.SelfAndDescendantsBreadthFirst(value);
//!pastedoc M:Sawmill.Rewriter.DescendantsAndSelf``1(Sawmill.IRewriter{``0},``0) /// <summary> /// Yields all of the nodes in the tree represented by <paramref name="value" />, starting at the bottom. /// /// <para> /// This is a depth-first post-order traversal. /// </para><seealso cref="M:Sawmill.Rewriter.SelfAndDescendants``1(Sawmill.IRewriter{``0},``0)" /></summary> /// <example> /// <code> /// Expr expr = new Add( /// new Add( /// new Lit(1), /// new Lit(2) /// ), /// new Lit(3) /// ); /// Expr[] expected = new[] /// { /// new Lit(1), /// new Lit(2), /// new Add(new Lit(1), new Lit(2)), /// new Lit(3), /// expr /// }; /// Assert.Equal(expected, rewriter.DescendantsAndSelf(expr)); /// </code> /// </example> /// <param name="value">The value to traverse</param> /// <returns>An enumerable containing all of the nodes in the tree represented by <paramref name="value" />, starting at the bottom.</returns> /// <seealso cref="M:Sawmill.Rewriter.DescendantsAndSelf``1(Sawmill.IRewriter{``0},``0)"/> public static IEnumerable <global::HtmlAgilityPack.HtmlNode> DescendantsAndSelf(this global::HtmlAgilityPack.HtmlNode value) => HtmlNodeRewriter.Instance.DescendantsAndSelf(value);
//!pastedoc M:Sawmill.IRewriter`1.SetChildren(System.ReadOnlySpan{`0},`0) /// <summary> /// Set the immediate children of the value. /// <para> /// Callers should ensure that <paramref name="newChildren" /> contains the same number of children as was returned by /// <see cref="M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)" />. /// </para><seealso cref="M:Sawmill.IRewritable`1.SetChildren(System.ReadOnlySpan{`0})" /></summary> /// <example> /// Given a representation of the expression <c>(1+2)+3</c>, /// <code> /// Expr expr = new Add( /// new Add( /// new Lit(1), /// new Lit(2) /// ), /// new Lit(3) /// ); /// </code><see cref="M:Sawmill.IRewriter`1.SetChildren(System.ReadOnlySpan{`0},`0)" /> replaces the immediate children of the topmost node. /// <code> /// Expr expected = new Add( /// new Lit(4), /// new Lit(5) /// ); /// Assert.Equal(expected, rewriter.SetChildren(Children.Two(new Lit(4), new Lit(5)), expr)); /// </code></example> /// <param name="newChildren">The new children</param> /// <param name="value">The old value, whose immediate children should be replaced</param> /// <returns>A copy of <paramref name="value" /> with updated children.</returns> /// <seealso cref="M:Sawmill.IRewriter`1.SetChildren(System.ReadOnlySpan{`0},`0)"/> public static global::HtmlAgilityPack.HtmlNode SetChildren(this global::HtmlAgilityPack.HtmlNode value, ReadOnlySpan <global::HtmlAgilityPack.HtmlNode> newChildren) => HtmlNodeRewriter.Instance.SetChildren(newChildren, value);
//!pastedoc M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0) /// <summary> /// Copy the immediate children of the value into <paramref name="childrenReceiver" />. /// <seealso cref="M:Sawmill.IRewritable`1.GetChildren(System.Span{`0})" /></summary> /// <example> /// Given a representation of the expression <c>(1+2)+3</c>, /// <code> /// Expr expr = new Add( /// new Add( /// new Lit(1), /// new Lit(2) /// ), /// new Lit(3) /// ); /// </code><see cref="M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)" /> copies the immediate children of the topmost node into the span. /// <code> /// Expr[] expected = new[] /// { /// new Add( /// new Lit(1), /// new Lit(2) /// ), /// new Lit(3) /// }; /// var array = new Expr[rewriter.CountChildren(expr)]; /// rewriter.GetChildren(array, expr); /// Assert.Equal(expected, array); /// </code></example> /// <param name="childrenReceiver"> /// A <see cref="T:System.Span`1" /> to copy <paramref name="value" />'s immediate children into. /// The <see cref="T:System.Span`1" />'s <see cref="P:System.Span`1.Length" /> will be equal to the number returned by <see cref="M:Sawmill.IRewriter`1.CountChildren(`0)" />. /// </param> /// <param name="value">The value</param> /// <seealso cref="M:Sawmill.IRewriter`1.GetChildren(System.Span{`0},`0)"/> public static void GetChildren(this global::HtmlAgilityPack.HtmlNode value, Span <global::HtmlAgilityPack.HtmlNode> childrenReceiver) => HtmlNodeRewriter.Instance.GetChildren(childrenReceiver, value);
private string GetTitle(Document document) { Node titleNode = document.DocumentNode.SelectSingleNode("//title"); return((titleNode == null) ? null : titleNode.InnerText.Trim('\t').Replace(_titleFilters, string.Empty).Trim()); }