private void ProcessBookElement(HtmlNode trElement, List<SearchResult> SearchResultUrlList)
        {
            if (trElement.Name != "tr") return;

            var item = new SearchResult();

            var tdElements = trElement.DescendantNodes().Where(n => n.Name == "td").ToArray();

            if (tdElements.Length == 5)
            {
                //Parse Book name and index page url
                var aElement = tdElements[1].DescendantNodes().Where(n => n.Name == "a").First();

                if (aElement != null)
                {
                    item.IndexPageUri = new Uri("http://www.xiaoelang.com" + aElement.Attributes["href"].Value, UriKind.Absolute);
                    item.BookName = aElement.InnerText.Trim();
                    item.Book = new Book();
                    item.Book.Name = item.BookName;
                }

                //Parse last update chapter name
                aElement = tdElements[2].DescendantNodes().Where(n => n.Name == "a").First();

                if (aElement != null)
                {
                    item.LastUpdateChapterName = aElement.InnerText.Trim();
                }
                //Parse last update time
                //<td class="xt" align="center" valign="middle" width="78">2011-08-26</td>
            }

            SearchResultUrlList.Add(item);
        }
        public static IEnumerable<HtmlNode> GetDirectChildrenByType(HtmlNode node, string type)
        {
            if (node == null) return null;

            var childs =
                node.DescendantNodes().Where(
                    n => n.Name.ToLower(CultureInfo.InvariantCulture) == type.ToLower(CultureInfo.InvariantCulture));

            return childs;
        }
        public static HtmlNode GetSingleDirectChildByTypeAndIndex(HtmlNode node, string type, int index)
        {
            if (node == null) return null;

            var childs =
                node.DescendantNodes().Where(
                    n => n.Name.ToLower(CultureInfo.InvariantCulture) == type.ToLower(CultureInfo.InvariantCulture));


            int count = childs.Count();
            if (count == 0) return null;
            return childs.Skip(index).First();
        }
        public static HtmlNode FindElementById (HtmlNode node, string id)
        {
            foreach (var child in node.DescendantNodes())
            {
                if (child.Id.ToLower() == id.ToLower() )
                {
                    return child;
                }
                else
                {
                    var result = FindElementById(child, id);

                    if (result != null)
                        return result;
                }
            }
            return null;
        }
Exemple #5
0
        //tags are closed weird, can't just get descendants
        public IEnumerable<HtmlForm> GetFormsFromContentNode(HtmlNode content)
        {
            var forms = new List<HtmlForm>();

            foreach (var child in content.DescendantNodes())
            {
                if (child.Name.Equals("form"))
                {
                    forms.Add(new HtmlForm { FormNode = child, ActionUrl = child.GetAttributeValue("action", "") });
                }
                else if (child.Name.Equals("input"))
                {
                    var key = child.GetAttributeValue("name", null);
                    var value = child.GetAttributeValue("value", null);

                    if (!string.IsNullOrEmpty(key) && !key.Equals("reset"))
                    {
                        forms.Last().Inputs[key] = value;
                    }
                }
                else if (child.Name.Equals("select"))
                {
                    var key = child.GetAttributeValue("name", null);
                    var options = child.Descendants("option")
                        .Select(y => y.GetAttributeValue("value", null))
                        .Where(y => !string.IsNullOrEmpty(y))
                        .ToList();

                    if (!string.IsNullOrEmpty(key))
                    {
                        forms.Last().Selects[key] = options;
                    }
                }
            }

            return forms;
        }
 /// <summary>
 /// <div style='display:none'>
 /// </summary>
 /// <param name="node"></param>
 /// <param name="nodesToBeDeleted"></param>
 public static void RemoveNonDisplayNode(HtmlNode node , List<HtmlNode> nodesToBeDeleted)
 {
     foreach (var child in node.DescendantNodes())
     {
         if (child.Attributes["style"]!= null &&  child.Attributes["style"].Value.ToLower().Contains("display:none"))
         {
             nodesToBeDeleted.Add(child);
         }
         else
         {
             RemoveNonDisplayNode(child , nodesToBeDeleted);
         }
     }
 }
 public static void RemoveNodeById(HtmlNode node, string Id, List<HtmlNode> nodesToBeDeleted)
 {
     foreach (var child in node.DescendantNodes())
     {
         if (child.Id.ToLower() == Id.ToLower())
         {
             nodesToBeDeleted.Add(child);
             return;
         }
         else
         {
             RemoveNodeByType(child, Id, nodesToBeDeleted);
         }
     }
 }
        public static void RemoveNodeByClassName(HtmlNode node, string className, List<HtmlNode> nodesToBeDeleted)
        {
            foreach (var child in node.DescendantNodes())
            {
                if (child.Attributes["class"] == null ) continue;

                if (child.Attributes["class"].Value.ToLower() == className)
                {
                    nodesToBeDeleted.Add(child);
                }
                else
                {
                    RemoveNodeByType(child, className, nodesToBeDeleted);
                }
            }
        }
Exemple #9
0
 /// <summary>
 /// Returns a node in the immediate decendants of the node which holds the attribute-value pair
 /// </summary>
 /// <param name="node">an HTML Node</param>
 /// <param name="attribute"></param>
 /// <param name="value"></param>
 /// <returns></returns>
 public HtmlNode getChild(HtmlNode node, string attribute, string value)
 {
     if (node == null || attribute == null || value == null)
         return null;
     foreach (HtmlNode child in node.DescendantNodes())
     {
         if (child.Attributes[attribute] != null)
         {
             if (child.Attributes[attribute].Value == value)
                 return child;
         }
     }
     throw new InvalidDataException();
     //return null;
 }
 public static void GetAllHyperlinkElementWithFilter(HtmlNode node, List<HtmlNode> hyperLinkNode)
 {
     foreach (var child in node.DescendantNodes())
     {
         if (child.Name.ToLower() == "a" 
             && child.Attributes["target"] == null 
             && child.Attributes["onclick"] == null
             && child.InnerText.Trim().Length > 1
             && !child.InnerText.Contains("最新章节")
            )
         {
             var href = child.Attributes["href"];
             var hrefValue = href == null ? String.Empty : href.Value.ToLower();
             if ( href != null
                 && !String.IsNullOrEmpty(hrefValue)
                 && !hrefValue.EndsWith("/") 
                 && !hrefValue.Contains("xiazai")
                 && !hrefValue.Contains("shuye")
                 && !hrefValue.Contains("javascript")
                 && !hrefValue.Contains("php")
                 && !hrefValue.Contains("list")
                 && !hrefValue.Contains("index")
                 && !hrefValue.Contains("mailto")
                )
             {
                 if (!hyperLinkNode.Contains(child))
                     hyperLinkNode.Add(child);
             }
             continue;
         }
         GetAllHyperlinkElementWithFilter(child, hyperLinkNode);
     }
 }
        public static void GetAllTextElement (HtmlNode node, List<HtmlNode> textNodeParents )
        {
            foreach (var child in node.DescendantNodes())
            {
                if (child.NodeType == HtmlNodeType.Text)
                {
                    if ( !String.IsNullOrWhiteSpace(child.InnerText) && child.InnerText.Trim().Length > 1)
                    {
                       if (!textNodeParents.Contains(child))
                            textNodeParents.Add(child);
                    }
                }

                GetAllTextElement(child, textNodeParents);
            }
        }
 public static void RemoveNodeByType(HtmlNode node, string type,  List<HtmlNode> nodesToBeDeleted)
 {
     foreach (var child in node.DescendantNodes())
     {
         if (child.Name.ToLower() == type)
         {
             nodesToBeDeleted.Add(child);
         }
         else
         {
             RemoveNodeByType(child,type, nodesToBeDeleted);
         }
     }
 }
        private String getTitleFromURLNode(HtmlNode n)
        {
            String title = "";
            if (n.Name.ToLower() == "a")
            {
                title = n.InnerText;

            }else
            {
                foreach (HtmlNode i in n.DescendantNodes().ToList())
                {
                    String u = getTitleFromURLNode(i);
                    if (u != "")
                    {
                        title = u;
                        break;
                    }
                }
            }
            return title;
        }
 public String getURLFromNode(HtmlNode n)
 {
     String url = "";
     if (n.Name.ToLower() == "a")
     {
         foreach (HtmlAttribute att in n.Attributes)
         {
             if (att.Name.ToLower() == "href")
             {
                 url = att.Value;
                 break;
             }
         }
     }
     else
     {
         foreach (HtmlNode i in n.DescendantNodes().ToList())
         {
             String u = getURLFromNode(i);
             if (u != "")
             {
                 url = u;
                 break;
             }
         }
     }
     return url;
 }
Exemple #15
0
        public void helpItem(HtmlNode item, Episode ep)
        {
            ep.ShowName = item.DescendantNodes().ToArray<HtmlNode>()[0].InnerText;
            foreach (HtmlNode titleofzo in item.DescendantNodes())
                if (titleofzo.Attributes["class"] != null)
                {
                    switch (titleofzo.Attributes["class"].Value)
                    {
                        case "title":
                            ep.ShowName = titleofzo.DescendantNodes().ToArray<HtmlNode>()[0].InnerText;
                            break;
                        case "infos":
                            string[] build = titleofzo.DescendantNodes().ToArray<HtmlNode>()[0].InnerHtml.Split(new char[] { ' ' });
                            ep.ISeason = int.Parse(build[1]);
                            ep.IEpisode = int.Parse(build[3].Replace(":", ""));
                            ep.EpisodePos = "S" + build[1] + "E" + build[3].Replace(":", "");
                            string name = "";
                            for (int i = 4; i < build.Length; i++)
                            {
                                name += build[i] + " ";
                            }
                            ep.EpisodeName = name;
                            foreach (HtmlNode searchforSum in titleofzo.DescendantNodes())
                            {
                                if (searchforSum.Attributes["class"] != null && searchforSum.Attributes["class"].Value == "summary hidden-xs")
                                {
                                    ep.summary = searchforSum.InnerText.Replace(@"...&nbsp;(more)", "");
                                }
                            }
                            break;
                        case "btn btn-default watch-episode-button":
                            ep.id = titleofzo.Attributes["episodeid"].Value;
                            break;

                    }
                }
        }
        public static HtmlNode FindElementByClass(HtmlNode node, string className)
        {
            foreach (var child in node.DescendantNodes())
            {
                if (child.Attributes["class"] != null 
                    && child.Attributes["class"].Value.ToLower() == className.ToLower())
                {
                    return child;
                }
                else
                {
                    var result = FindElementById(child, className);

                    if (result != null)
                        return result;
                }
            }
            return null;
        }
 protected void RemoveWhitespace(HtmlNode root)
 {
     root.DescendantNodes().ForEach(x => x.InnerHtml = Regex.Replace(x.InnerHtml, "\\s+", " "));
     root.DescendantNodes().ForEach(x => x.InnerHtml = x.InnerHtml.Trim());
 }
 public static void GetAllImageElementWithFilter(HtmlNode node, List<HtmlNode> imageNodes)
 {
     foreach (var child in node.DescendantNodes())
     {
         if (child.Name.ToLower() == "img")
         {
             if (child.Attributes["src"] != null)
             {
                 if (!imageNodes.Contains(child))
                     imageNodes.Add(child);
             }
             continue;
         }
         GetAllImageElementWithFilter(child, imageNodes);
     }
 }