internal static string GetAttributeValue(this Common.Html.Node node, string key, string defaultValue) { foreach (var attr in node.Attr) { if (attr.Namespace == "" && attr.Key == key) { return(attr.Val); } } return(defaultValue); }
internal static void SetAttributeValue(this Common.Html.Node node, string key, string value) { foreach (var attr in node.Attr) { if (attr.Namespace == "" && attr.Key == key) { attr.Val = value; return; } } node.Attr.Add(new Common.Html.Attribute { Namespace = "", Key = key, Val = value, }); }
internal static IEnumerable <Common.Html.Node> Descendants(this Common.Html.Node root, string tagName) { if (root.Type != Common.Html.NodeType.Element) { yield break; } if (root.Data == tagName) { yield return(root); } for (var c = root.FirstChild; c != null; c = c.NextSibling) { foreach (var d in c.Descendants(tagName)) { yield return(d); } } }
internal static string GetInnerText(this Common.Html.Node node) { switch (node.Type) { case Common.Html.NodeType.Text: return(node.Data); case Common.Html.NodeType.Element: var buf = new StringBuilder(); for (var c = node.FirstChild; c != null; c = c.NextSibling) { buf.Append(c.GetInnerText()); } return(buf.ToString()); default: return(""); } }
private static IEnumerable <string> GetLinkUrls(Common.Html.Node node) { if (node.Type != Common.Html.NodeType.Element) { yield break; } var href = node.GetAttributeValue("href", null); if (href != null) { yield return(href); } for (var c = node.FirstChild; c != null; c = c.NextSibling) { foreach (var u in GetLinkUrls(c)) { yield return(u); } } }
public static string Clean(string html) { html = Cleaner.Preprocess(Config, html); var nodes = Cleaner.Parse(html); foreach (var img in nodes.Descendants("img")) { img.Data = "a"; img.DataAtom = atom.A; var alt = new Common.Html.Node { Type = Common.Html.NodeType.Text, Data = "[image]", }; img.AppendChild(alt); foreach (var attr in img.Attr) { if (attr.Namespace != "") { continue; } if (attr.Key == "src") { attr.Key = "href"; } if (attr.Key == "alt") { alt.Data = attr.Val; } } } foreach (var a in nodes.Descendants("a")) { a.SetAttributeValue("rel", "nofollow"); } nodes = Cleaner.CleanNodes(Config, nodes); return(Cleaner.Render(nodes)); }