示例#1
0
 internal static string GetAttributeValue(this Common.Html.Node node, string key, string defaultValue)
 {
     foreach (var attr in node.Attr)
     {
         if (attr.Namespace == "" && attr.Key == key)
         {
             return(attr.Val);
         }
     }
     return(defaultValue);
 }
示例#2
0
 internal static void SetAttributeValue(this Common.Html.Node node, string key, string value)
 {
     foreach (var attr in node.Attr)
     {
         if (attr.Namespace == "" && attr.Key == key)
         {
             attr.Val = value;
             return;
         }
     }
     node.Attr.Add(new Common.Html.Attribute
     {
         Namespace = "",
         Key       = key,
         Val       = value,
     });
 }
示例#3
0
 internal static IEnumerable <Common.Html.Node> Descendants(this Common.Html.Node root, string tagName)
 {
     if (root.Type != Common.Html.NodeType.Element)
     {
         yield break;
     }
     if (root.Data == tagName)
     {
         yield return(root);
     }
     for (var c = root.FirstChild; c != null; c = c.NextSibling)
     {
         foreach (var d in c.Descendants(tagName))
         {
             yield return(d);
         }
     }
 }
示例#4
0
        internal static string GetInnerText(this Common.Html.Node node)
        {
            switch (node.Type)
            {
            case Common.Html.NodeType.Text:
                return(node.Data);

            case Common.Html.NodeType.Element:
                var buf = new StringBuilder();
                for (var c = node.FirstChild; c != null; c = c.NextSibling)
                {
                    buf.Append(c.GetInnerText());
                }
                return(buf.ToString());

            default:
                return("");
            }
        }
示例#5
0
        private static IEnumerable <string> GetLinkUrls(Common.Html.Node node)
        {
            if (node.Type != Common.Html.NodeType.Element)
            {
                yield break;
            }

            var href = node.GetAttributeValue("href", null);

            if (href != null)
            {
                yield return(href);
            }

            for (var c = node.FirstChild; c != null; c = c.NextSibling)
            {
                foreach (var u in GetLinkUrls(c))
                {
                    yield return(u);
                }
            }
        }
示例#6
0
        public static string Clean(string html)
        {
            html = Cleaner.Preprocess(Config, html);
            var nodes = Cleaner.Parse(html);

            foreach (var img in nodes.Descendants("img"))
            {
                img.Data     = "a";
                img.DataAtom = atom.A;
                var alt = new Common.Html.Node
                {
                    Type = Common.Html.NodeType.Text,
                    Data = "[image]",
                };
                img.AppendChild(alt);
                foreach (var attr in img.Attr)
                {
                    if (attr.Namespace != "")
                    {
                        continue;
                    }
                    if (attr.Key == "src")
                    {
                        attr.Key = "href";
                    }
                    if (attr.Key == "alt")
                    {
                        alt.Data = attr.Val;
                    }
                }
            }
            foreach (var a in nodes.Descendants("a"))
            {
                a.SetAttributeValue("rel", "nofollow");
            }
            nodes = Cleaner.CleanNodes(Config, nodes);
            return(Cleaner.Render(nodes));
        }