Exemple #1
0
        private static void GetListPage(string url)
        {
            string       html     = GetGeneralContent(url);
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//tbody[@id='data_list']");

            foreach (HtmlNode node in htmlNode.Elements("tr"))
            {
                try
                {
                    var elements = node.Elements("td").ToArray();
                    if (elements.Length < 3)
                    {
                        continue;
                    }
                    var category = elements[1].InnerText.Trim();
                    Console.WriteLine(category);
                    var detailTd   = elements[2];
                    var detailLink = detailTd.Element("a");
                    var detailUrl  = detailLink.GetAttributeValue("href", "").Trim();
                    Console.WriteLine(detailUrl);
                    var title = detailLink.InnerText.Trim().Replace(".Mp4Ba", "");
                    Console.WriteLine(title);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Mp4Ba))
                    {
                        continue;
                    }
                    var blog = new Blog {
                        Title = title.Length > 200 ? title.Substring(0, 200) : title
                    };
                    string coverUrl = "";
                    var    link     = GetIntroDetail("http://www.mp4ba.com/" + detailUrl, blog, out coverUrl);
                    bool   syncFlag = false;
                    syncFlag = SaveBlog(blog, link);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, coverUrl, category);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, coverUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Mp4Ba);
                    Console.WriteLine("Blog Added:" + blog.Id);
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
        }
Exemple #2
0
        private static void GetListPage(string url, string category)
        {
            string html = GetGeneralContent(url);

            if (html == "")
            {
                return;
            }
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@id='main']//ul");

            if (htmlNode == null)
            {
                return;
            }
            foreach (HtmlNode node in htmlNode.Elements("li"))
            {
                try
                {
                    var blog  = new Blog();
                    var thumb = node.Element("div");
                    if (thumb == null)
                    {
                        continue;
                    }
                    var link = thumb.Element("a");
                    if (link == null)
                    {
                        continue;
                    }
                    var detailUrl = link.GetAttributeValue("href", "");
                    if (detailUrl == "")
                    {
                        continue;
                    }
                    Trace(detailUrl);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Verycd))
                    {
                        Info("resourceId:{0} exist", resourceId);
                        _existResource++;
                        continue;
                    }
                    var title = link.GetAttributeValue("title", "");
                    if (title == "")
                    {
                        continue;
                    }
                    blog.Title = title.Length > 200 ? title.Substring(0, 200) : title;
                    Trace(title);
                    var style    = link.GetAttributeValue("style", "");
                    var imageUrl = style.Replace("background-image:", "").Replace("url(", "").Replace(")", "").Trim();
                    Trace("imageUrl:" + imageUrl);
                    var info = node.Elements("div").Skip(1).FirstOrDefault();
                    if (info == null)
                    {
                        continue;
                    }
                    var           infoParas   = info.Elements("p");
                    StringBuilder htmlBuilder = new StringBuilder();
                    StringBuilder textBuilder = new StringBuilder();
                    var           paraArray   = infoParas.Skip(1).Take(4).ToArray();
                    foreach (HtmlNode para in paraArray)
                    {
                        htmlBuilder.Append(para.OuterHtml);
                        textBuilder.Append(para.InnerText);
                    }
                    blog.HtmlContent = htmlBuilder.ToString();
                    blog.Content     = textBuilder.ToString();

                    var urlList = GetIntroDetail(detailUrl, blog);
                    if (urlList.Count == 0)
                    {
                        continue;
                    }
                    bool syncFlag = SaveBlog(blog, urlList);
                    if (!syncFlag)
                    {
                        Info("Blog Sync Fail,url:{0},blogId:{1}", url, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, imageUrl, category);
                    if (!syncFlag)
                    {
                        Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Verycd);
                    Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId);
                    _fetchCount++;
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
        }
Exemple #3
0
        private static void GetListPage(string url, string category)
        {
            string html = GetGeneralContent(url);

            if (html == "")
            {
                return;
            }
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//table[@class='CommonListArea']");

            if (htmlNode == null)
            {
                return;
            }
            int    tdIndex   = 0;
            string detailUrl = "";

            foreach (HtmlNode node in htmlNode.Elements("tr"))
            {
                try
                {
                    if (++tdIndex <= 2)
                    {
                        continue;
                    }
                    var tdArray = node.Elements("td").ToArray();
                    if (tdArray.Length < 4)
                    {
                        continue;
                    }
                    var blog      = new Blog();
                    var titleNode = tdArray[0];

                    var title = titleNode.InnerText;
                    if (title == "")
                    {
                        continue;
                    }
                    blog.Title = title.Length > 200 ? title.Substring(0, 200) : title;
                    Trace(title);
                    var link = titleNode.Elements("a").Skip(1).FirstOrDefault();
                    if (link == null)
                    {
                        continue;
                    }
                    detailUrl = link.GetAttributeValue("href", "");
                    if (detailUrl == "")
                    {
                        continue;
                    }
                    Trace(detailUrl);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Ed2000))
                    {
                        _existResource++;
                        var distributeDate = tdArray[1].InnerText;
                        var updateTime     = tdArray[2].InnerText;
                        if (updateTime.Contains(distributeDate))
                        {
                            Info("Resource Existed,resourceId:{0}", resourceId);
                        }
                        else
                        {
                            _updateResource++;
                            int blogId = CrawlerUtility.ExistContinutedRecord(resourceId, ResourceType.Ed2000);
                            Info("resourceId:{0} exist,to be updated blogId:{1}", resourceId, blogId);
                            if (AppendBlogLinks(resourceId, blogId))
                            {
                                Info("Blog Updated:" + blogId);
                            }
                            else
                            {
                                Info("Blog Sync Fail,resourceId:{0},blogId:{1}", resourceId, blog.Id);
                            }
                            _fetchCount++;
                        }
                        continue;
                    }
                    string imageUrl = "";
                    var    urlList  = GetIntroDetail("http://www.ed2000.com" + detailUrl, blog, out imageUrl);
                    if (urlList.Count == 0)
                    {
                        continue;
                    }
                    bool syncFlag = false;
                    syncFlag = SaveBlog(blog, urlList);
                    if (!syncFlag)
                    {
                        Info("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, imageUrl, category);
                    if (!syncFlag)
                    {
                        Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Ed2000, true);
                    Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId);
                    _fetchCount++;
                }
                catch (Exception ex)
                {
                    Logger.Error(ex, detailUrl);
                }
            }
        }
Exemple #4
0
            private static void GetListPage(string url, string category)
            {
                string html = GetGeneralContent(url);

                if (html == "")
                {
                    return;
                }
                HtmlDocument document = new HtmlDocument();

                document.LoadHtml(html);
                HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@id='tech']");

                if (htmlNode == null)
                {
                    return;
                }
                foreach (HtmlNode node in htmlNode.Elements("div"))
                {
                    try
                    {
                        var childrenDiv = node.Elements("div").ToArray();
                        if (childrenDiv == null || childrenDiv.Length < 3)
                        {
                            continue;
                        }
                        var blog  = new Blog();
                        var thumb = childrenDiv.FirstOrDefault();
                        if (thumb == null)
                        {
                            continue;
                        }
                        var link = thumb.Element("a");
                        if (link == null)
                        {
                            continue;
                        }
                        var detailUrl = link.GetAttributeValue("href", "");
                        if (detailUrl == "")
                        {
                            continue;
                        }
                        Trace(detailUrl);
                        string resourceId = GetResourceId(detailUrl);
                        if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Ed2Kers))
                        {
                            Info("resourceId:{0} exist", resourceId);
                            continue;
                        }
                        var    imageNode = link.Element("img");
                        string imageUrl  = "";
                        if (imageNode != null)
                        {
                            imageUrl = imageNode.GetAttributeValue("data-original", "");
                            Trace("imageUrl:" + imageUrl);
                        }

                        var titleNode = childrenDiv[1].Element("ul").Element("li").Elements("a").LastOrDefault();
                        if (titleNode == null)
                        {
                            continue;
                        }
                        var title = titleNode.InnerText;
                        if (title == "")
                        {
                            continue;
                        }
                        blog.Title = title.Length > 200 ? title.Substring(0, 200) : title;
                        Trace(title);

                        var urlList = GetIntroDetail("http://www.ed2kers.com/" + detailUrl, blog);
                        if (urlList.Count == 0)
                        {
                            continue;
                        }
                        bool syncFlag = SaveBlog(blog, urlList);
                        if (!syncFlag)
                        {
                            Info("Blog Sync Fail,url:{0},blogId:{1}", url, blog.Id);
                            continue;
                        }
                        syncFlag = SaveRecomment(blog, imageUrl, category);
                        if (!syncFlag)
                        {
                            Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                        }
                        CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Ed2Kers);
                        Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId);
                        _fetchCount++;
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex);
                    }
                }
            }
Exemple #5
0
        private static void GetListPage(string url)
        {
            string html = GetGeneralContent(url);

            if (html == "")
            {
                return;
            }
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@class='resource-showlist']//ul");

            if (htmlNode == null)
            {
                return;
            }
            foreach (HtmlNode node in htmlNode.Elements("li"))
            {
                try
                {
                    bool   continuted    = false;
                    string continuteType = "";
                    var    nodeArray     = node.Elements("div").ToArray();
                    var    infoNode      = nodeArray[1];
                    if (infoNode == null)
                    {
                        continue;
                    }
                    var detailNode          = infoNode.Element("dl").Element("dt").Element("strong").Element("a");
                    var detailContinuedNode = infoNode.Element("dl").Element("dt").Element("font");
                    if (detailNode == null)
                    {
                        continue;
                    }
                    string title     = detailNode.InnerText;
                    string detailUrl = detailNode.GetAttributeValue("href", "");
                    if (detailUrl == "")
                    {
                        continue;
                    }
                    Console.WriteLine(title);
                    if (detailContinuedNode != null)
                    {
                        continuteType = detailContinuedNode.InnerText;
                        Console.WriteLine(continuteType);
                    }
                    if (continuteType.Contains("[尚未开播]"))
                    {
                        continue;
                    }
                    if (continuteType.Contains("连载中]") || continuteType.Contains("季完结]"))
                    {
                        continuted = true;
                    }

                    Console.WriteLine(detailUrl);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Zimuzu))
                    {
                        int blogId = CrawlerUtility.ExistContinutedRecord(resourceId, ResourceType.Zimuzu);
                        if (blogId > 0)
                        {
                            Console.WriteLine("resourceId:{0} exist,to be updated blogId:{1}", resourceId, blogId);
                            if (AppendBlogLinks(resourceId, blogId))
                            {
                                Console.WriteLine("Blog Updated:" + blogId);
                            }
                            else
                            {
                                Console.WriteLine("Blog Sync Fail,resourceId:{0},blogId:{1}", resourceId, blogId);
                            }
                            if (!continuted)
                            {
                                CrawlerUtility.UpdateRecordOver(blogId);
                            }
                        }
                        else
                        {
                            Console.WriteLine("resourceId:{0} exist", resourceId);
                        }
                        continue;
                    }

                    var blog = new Blog {
                        Title = title.Length > 200 ? title.Substring(0, 200) : title
                    };
                    string blogImgUrl  = "";
                    string coverImgUrl = "";
                    GetBlogContent(detailUrl, blog, out blogImgUrl, out coverImgUrl);
                    string      linkUrl  = detailUrl.Replace("resource", "resource/list");
                    List <Link> linkList = GetBlogLink(linkUrl);
                    if (string.IsNullOrEmpty(blog.Content) || linkList.Count == 0)
                    {
                        continue;
                    }
                    ImageUrl imageUrl = DownloadBlogImgToLocal(blogImgUrl);
                    bool     syncFlag = false;
                    syncFlag = SaveBlog(blog, imageUrl, linkList);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, coverImgUrl);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Zimuzu, continuted);
                    Console.WriteLine("Blog Added:" + blog.Id);
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
        }