Ejemplo n.º 1
0
            private static bool SaveRecomment(Blog blog, string imageUrl, string category)
            {
                if (imageUrl == "")
                {
                    return(true);
                }
                string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.ed2kers.com/");

                if (string.IsNullOrEmpty(fileName))
                {
                    fileName = imageUrl;
                }
                string localFilePath = Const.CoverFileDirectory + fileName;

                string yunUrl = "";

                //UploadUtility.UploadLocalFile(localFilePath);
                Info("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl);
                var recomment = new Recomment()
                {
                    BlogId    = blog.Id,
                    CoverName = fileName,
                    Title     = blog.Title,
                    TimeStamp = DateTime.Now,
                    Category  = SetCategory(category)
                };

                if (!string.IsNullOrEmpty(yunUrl))
                {
                    recomment.YunUrl      = yunUrl;
                    recomment.ImageStatus = ImageStatus.Yun;
                }
                return(SyncUtility.SyncRecomment(recomment));
            }
Ejemplo n.º 2
0
        private static bool SaveRecomment(Blog blog, string coverImgUrl)
        {
            if (coverImgUrl == "")
            {
                return(true);
            }
            string fileName = CrawlerUtility.GetFileContent(coverImgUrl, Const.CoverFileDirectory, "http://www.zimuzu.tv/");

            if (string.IsNullOrEmpty(fileName))
            {
                return(true);
            }
            string localFilePath = Const.CoverFileDirectory + fileName;
            var    image         = Domain.Helper.ImgHandler.ZoomPicture(Image.FromFile(localFilePath), 200, 110);

            image.Save(localFilePath);
            string yunUrl = UploadUtility.UploadLocalFile(localFilePath);

            Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl);
            var recomment = new Recomment()
            {
                BlogId    = blog.Id,
                CoverName = fileName,
                Title     = blog.Title,
                TimeStamp = DateTime.Now,
                Category  = SetCategoryByTitle(blog.Title)
            };

            if (!string.IsNullOrEmpty(yunUrl))
            {
                recomment.YunUrl      = yunUrl;
                recomment.ImageStatus = ImageStatus.Yun;
            }
            return(SyncUtility.SyncRecomment(recomment));
        }
Ejemplo n.º 3
0
        private static ImageUrl DownloadBlogImgToLocal(string blogImgUrl)
        {
            ImageUrl imageUrl = new ImageUrl();
            string   fileName = CrawlerUtility.GetFileContent(blogImgUrl, Const.BlogFileDirectory, "http://www.zimuzu.tv/");

            if (fileName == "")
            {
                return(null);
            }
            string localFilePath = Const.BlogFileDirectory + fileName;

            using (Image downloadImage = Image.FromFile(localFilePath))
            {
                if (downloadImage.Size.Width > 500)
                {
                    var image = Domain.Helper.ImgHandler.ZoomPictureProportionately(downloadImage, 500, 500 * downloadImage.Size.Height / downloadImage.Size.Width);
                    image.Save(localFilePath);
                }
            }

            string yunUrl = UploadUtility.UploadLocalFile(localFilePath);

            Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl);
            imageUrl.Url         = fileName;
            imageUrl.ImageStatus = ImageStatus.Local;
            imageUrl.TimeStamp   = DateTime.Now;
            if (!string.IsNullOrEmpty(yunUrl))
            {
                imageUrl.YunUrl      = yunUrl;
                imageUrl.ImageStatus = ImageStatus.Yun;
            }
            return(imageUrl);
        }
Ejemplo n.º 4
0
        private static string GetGeneralContent(string strUrl)
        {
            string referer = "http://www.zimuzu.tv/";
            var    cookies = new List <Cookie>
            {
                new Cookie("CNZZDATA1254180690", "176448891-1468518959-%7C1468679132", "/", "www.zimuzu.tv"),
                new Cookie("PHPSESSID", "ht8gtggpqjlth57ef41b7g03k0", "/", "www.zimuzu.tv"),
                new Cookie("GINFO",
                           "uid%3D3667676%26nickname%3Dkinshine%26group_id%3D1%26avatar_t%3D%26main_group_id%3D0%26common_group_id%3D54",
                           "/", ".zimuzu.tv"),
                new Cookie("GKEY", "e4b20dc7275fac6a7f984afcb5938ff3", "/", ".zimuzu.tv"),
                new Cookie("mykeywords",
                           "%3A1%3A%7Bi%3A0%3Bs%3A21%3A%22%E8%82%A5%E7%91%9E%E7%9A%84%E7%96%AF%E7%8B%82%E6%97%A5%E8%AE%B0%22%3B%7D",
                           "/", ".zimuzu.tv")
            };
            string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies);

            if (string.IsNullOrEmpty(html))
            {
                Thread.Sleep(10 * 1000);
            }
            else
            {
                Thread.Sleep(5 * 1000);
            }
            return(html);
        }
Ejemplo n.º 5
0
        private static string GetGeneralContent(string strUrl)
        {
            _requestCount++;
            string referer = "http://www.gdajie.com/";
            var    cookies = new List <Cookie>()
            {
                new Cookie("AJSTAT_ok_times", "5", "/", "verycd.gdajie.com"),
                new Cookie("Hm_lpvt_9398e7331484620894c57216bea9225e", "1468695450", "/", ".gdajie.com"),
                new Cookie("JSESSIONID", "7F0463C2223BFF2E8868FC7EFA7D0167", "/", "verycd.gdajie.com"),
                new Cookie("CNZZDATA4616656",
                           "cnzz_eid%3D1953947045-1464628831-http%253A%252F%252Fwww.verycd.gdajie.com%252F%26ntime%3D1467845452",
                           "/", "verycd.gdajie.com"),
                new Cookie("CNZZDATA1254524697", "78693709-1464629094-%7C1468690207", "/", "verycd.gdajie.com")
            };
            string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies);

            if (html.Contains("对不起,我们怀疑您正使用采集软件对我们的网站进行采集,所以我们采取了封您ip的决定"))
            {
                Console.WriteLine("url:" + strUrl + ",ip被封,_requestCount:" + _requestCount);
                Info("url:" + strUrl + ",ip被封,requestCount:" + _requestCount);
                html = "";
            }
            if (string.IsNullOrEmpty(html))
            {
                Thread.Sleep(10 * 1000);
            }
            else
            {
                Thread.Sleep(5 * 1000);
            }
            return(html);
        }
Ejemplo n.º 6
0
        private static string GetGeneralContent(string strUrl)
        {
            _requestCount++;
            string referer = "http://www.mp4ba.com/";
            var    cookies = new List <Cookie>()
            {
                new Cookie("CNZZDATA5925857", "cnzz_eid%3D921376316-1462025230-%26ntime%3D1467044423", "/",
                           "www.mp4ba.com")
            };

            string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies);

            if (html.Contains("服务器维护中,请稍后访问"))
            {
                Info("服务器维护中,请稍后访问,60秒后继续操作");
                html = "";
            }
            if (string.IsNullOrEmpty(html))
            {
                Thread.Sleep(60 * 1000);
            }
            else
            {
                Thread.Sleep(5 * 1000);
            }
            return(html);
        }
Ejemplo n.º 7
0
            private static string GetGeneralContent(string strUrl)
            {
                _requestCount++;
                string referer = "http://www.gdajie.com/";
                var    cookies = new List <Cookie>()
                {
                    new Cookie("AJSTAT_ok_pages", _requestCount.ToString(), "/", "www.ed2kers.com"),
                    new Cookie("AJSTAT_ok_times", "1", "/", "www.ed2kers.com"),
                    new Cookie("Hm_lpvt_9398e7331484620894c57216bea9225e", "1468695450", "/", ".gdajie.com"),
                    new Cookie("PHPSESSID", "0hbum6a055q5sfg5oi03a7ug35", "/", "www.ed2kers.com"),
                    new Cookie("Hm_lvt_9ad7d25789ce8adb7b225bc58a1b8525", "1470154733", "/", "ed2kers.com"),
                    new Cookie("Hm_lpvt_9ad7d25789ce8adb7b225bc58a1b8525", "1470156513", "/", "ed2kers.com")
                };
                string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies);

                if (string.IsNullOrEmpty(html))
                {
                    //Thread.Sleep(10 * 1000);
                }
                else
                {
                    //Thread.Sleep(5 * 1000);
                }
                return(html);
            }
Ejemplo n.º 8
0
        private static void GetListPage(string url)
        {
            string       html     = GetGeneralContent(url);
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//tbody[@id='data_list']");

            foreach (HtmlNode node in htmlNode.Elements("tr"))
            {
                try
                {
                    var elements = node.Elements("td").ToArray();
                    if (elements.Length < 3)
                    {
                        continue;
                    }
                    var category = elements[1].InnerText.Trim();
                    Console.WriteLine(category);
                    var detailTd   = elements[2];
                    var detailLink = detailTd.Element("a");
                    var detailUrl  = detailLink.GetAttributeValue("href", "").Trim();
                    Console.WriteLine(detailUrl);
                    var title = detailLink.InnerText.Trim().Replace(".Mp4Ba", "");
                    Console.WriteLine(title);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Mp4Ba))
                    {
                        continue;
                    }
                    var blog = new Blog {
                        Title = title.Length > 200 ? title.Substring(0, 200) : title
                    };
                    string coverUrl = "";
                    var    link     = GetIntroDetail("http://www.mp4ba.com/" + detailUrl, blog, out coverUrl);
                    bool   syncFlag = false;
                    syncFlag = SaveBlog(blog, link);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, coverUrl, category);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, coverUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Mp4Ba);
                    Console.WriteLine("Blog Added:" + blog.Id);
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
        }
Ejemplo n.º 9
0
        private static bool SaveRecomment(Blog blog, string imageUrl, string category)
        {
            if (imageUrl == "")
            {
                return(true);
            }
            string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.mp4ba.com/");

            if (fileName == "")
            {
                return(true);
            }

            var recomment = new Recomment()
            {
                BlogId    = blog.Id,
                Title     = blog.Title,
                CoverName = fileName,
                TimeStamp = DateTime.Now
            };

            if (category.Contains("电影"))
            {
                recomment.Category = RecommentCategory.电影;
            }
            if (category.Contains("电视剧"))
            {
                recomment.Category = RecommentCategory.剧集;
            }
            if (category == "欧美电视剧")
            {
                recomment.Category = RecommentCategory.美剧;
            }
            if (category == "日韩电视剧")
            {
                recomment.Category = RecommentCategory.韩剧;
            }
            if (category == "综艺娱乐")
            {
                recomment.Category = RecommentCategory.综艺;
            }
            string localFilePath = Const.CoverFileDirectory + fileName;
            var    image         = Domain.Helper.ImgHandler.ZoomPicture(Image.FromFile(localFilePath), 200, 110);

            image.Save(localFilePath);
            string yunUrl = UploadUtility.UploadLocalFile(localFilePath);

            Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl);
            if (!string.IsNullOrEmpty(yunUrl))
            {
                recomment.YunUrl      = yunUrl;
                recomment.ImageStatus = ImageStatus.Yun;
            }
            return(SyncUtility.SyncRecomment(recomment));
        }
Ejemplo n.º 10
0
        private static bool SaveRecomment(Blog blog, string imageUrl, string category)
        {
            if (imageUrl == "http://verycd.gdajie.com/img/default_cover.jpg" || imageUrl == "")
            {
                return(true);
            }
            string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.gdajie.com/");

            if (string.IsNullOrEmpty(fileName))
            {
                return(true);
            }
            string localFilePath = Const.CoverFileDirectory + fileName;

            try
            {
                using (var localImage = Image.FromFile(localFilePath))
                {
                    var image = ImgHandler.ZoomPicture(localImage, 200, 110);
                    image.Save(localFilePath);
                }
            }
            catch (Exception ex)
            {
                Logger.Error(ex, "localFilePath:{0}", localFilePath);
                return(true);
            }

            string yunUrl = UploadUtility.UploadLocalFile(localFilePath);

            Info("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl);
            var recomment = new Recomment()
            {
                BlogId    = blog.Id,
                CoverName = fileName,
                Title     = blog.Title,
                TimeStamp = DateTime.Now,
                Category  = SetCategory(category)
            };

            if (!string.IsNullOrEmpty(yunUrl))
            {
                recomment.YunUrl      = yunUrl;
                recomment.ImageStatus = ImageStatus.Yun;
            }
            return(SyncUtility.SyncRecomment(recomment));
        }
Ejemplo n.º 11
0
        private static bool SaveRecomment(Blog blog, string imageUrl, string category)
        {
            if (imageUrl == "http://verycd.gdajie.com/img/default_cover.jpg" || imageUrl == "")
            {
                return(true);
            }
            string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.gdajie.com/");

            if (string.IsNullOrEmpty(fileName))
            {
                return(true);
            }
            string localFilePath = Const.CoverFileDirectory + fileName;
            var    image         = Domain.Helper.ImgHandler.ZoomPicture(Image.FromFile(localFilePath), 200, 110);

            image.Save(localFilePath);
            string yunUrl = UploadUtility.UploadLocalFile(localFilePath);

            Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl);
            var recomment = new Recomment()
            {
                BlogId    = blog.Id,
                CoverName = fileName,
                Title     = blog.Title,
                TimeStamp = DateTime.Now,
                Category  = SetCategory(category)
            };

            if (!string.IsNullOrEmpty(yunUrl))
            {
                recomment.YunUrl      = yunUrl;
                recomment.ImageStatus = ImageStatus.Yun;
            }
            return(SyncUtility.SyncRecomment(recomment));
            //Context.Set<Recomment>().Add(recomment);
            //Context.SaveChanges();
        }
Ejemplo n.º 12
0
        private static string GetGeneralContent(string strUrl)
        {
            _requestCount++;
            string referer = "http://www.ed2000.com/";
            var    cookies = new List <Cookie>()
            {
                new Cookie("ASPSESSIONIDQCSQTDQD", "OPOIBEHBPMHCPNAMCBJHFPDD", "/", "www.ed2000.com"),
                new Cookie("UserID", "30602", "/", "www.ed2000.com"),
                new Cookie("UserPassword", "A9E485F632124349EDFCEC448E649702", "/", "www.ed2000.com"),
                new Cookie("CNZZDATA947842", "cnzz_eid%3D1459071056-1467470818-null%26ntime%3D1469246688", "/", "www.ed2000.com"),
                new Cookie("VisitsNumber", _requestCount.ToString(), "/", "www.ed2000.com")
            };
            string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies);

            if (string.IsNullOrEmpty(html))
            {
                Thread.Sleep(60 * 1000);
            }
            else
            {
                Thread.Sleep(2 * 1000);
            }
            return(html);
        }
Ejemplo n.º 13
0
        private static void GetListPage(string url, string category)
        {
            string html = GetGeneralContent(url);

            if (html == "")
            {
                return;
            }
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@id='main']//ul");

            if (htmlNode == null)
            {
                return;
            }
            foreach (HtmlNode node in htmlNode.Elements("li"))
            {
                try
                {
                    var blog  = new Blog();
                    var thumb = node.Element("div");
                    if (thumb == null)
                    {
                        continue;
                    }
                    var link = thumb.Element("a");
                    if (link == null)
                    {
                        continue;
                    }
                    var detailUrl = link.GetAttributeValue("href", "");
                    if (detailUrl == "")
                    {
                        continue;
                    }
                    Trace(detailUrl);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Verycd))
                    {
                        Info("resourceId:{0} exist", resourceId);
                        _existResource++;
                        continue;
                    }
                    var title = link.GetAttributeValue("title", "");
                    if (title == "")
                    {
                        continue;
                    }
                    blog.Title = title.Length > 200 ? title.Substring(0, 200) : title;
                    Trace(title);
                    var style    = link.GetAttributeValue("style", "");
                    var imageUrl = style.Replace("background-image:", "").Replace("url(", "").Replace(")", "").Trim();
                    Trace("imageUrl:" + imageUrl);
                    var info = node.Elements("div").Skip(1).FirstOrDefault();
                    if (info == null)
                    {
                        continue;
                    }
                    var           infoParas   = info.Elements("p");
                    StringBuilder htmlBuilder = new StringBuilder();
                    StringBuilder textBuilder = new StringBuilder();
                    var           paraArray   = infoParas.Skip(1).Take(4).ToArray();
                    foreach (HtmlNode para in paraArray)
                    {
                        htmlBuilder.Append(para.OuterHtml);
                        textBuilder.Append(para.InnerText);
                    }
                    blog.HtmlContent = htmlBuilder.ToString();
                    blog.Content     = textBuilder.ToString();

                    var urlList = GetIntroDetail(detailUrl, blog);
                    if (urlList.Count == 0)
                    {
                        continue;
                    }
                    bool syncFlag = SaveBlog(blog, urlList);
                    if (!syncFlag)
                    {
                        Info("Blog Sync Fail,url:{0},blogId:{1}", url, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, imageUrl, category);
                    if (!syncFlag)
                    {
                        Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Verycd);
                    Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId);
                    _fetchCount++;
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
        }
Ejemplo n.º 14
0
        private static void GetListPage(string url)
        {
            string html = GetGeneralContent(url);

            if (html == "")
            {
                return;
            }
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@class='resource-showlist']//ul");

            if (htmlNode == null)
            {
                return;
            }
            foreach (HtmlNode node in htmlNode.Elements("li"))
            {
                try
                {
                    bool   continuted    = false;
                    string continuteType = "";
                    var    nodeArray     = node.Elements("div").ToArray();
                    var    infoNode      = nodeArray[1];
                    if (infoNode == null)
                    {
                        continue;
                    }
                    var detailNode          = infoNode.Element("dl").Element("dt").Element("strong").Element("a");
                    var detailContinuedNode = infoNode.Element("dl").Element("dt").Element("font");
                    if (detailNode == null)
                    {
                        continue;
                    }
                    string title     = detailNode.InnerText;
                    string detailUrl = detailNode.GetAttributeValue("href", "");
                    if (detailUrl == "")
                    {
                        continue;
                    }
                    Console.WriteLine(title);
                    if (detailContinuedNode != null)
                    {
                        continuteType = detailContinuedNode.InnerText;
                        Console.WriteLine(continuteType);
                    }
                    if (continuteType.Contains("[尚未开播]"))
                    {
                        continue;
                    }
                    if (continuteType.Contains("连载中]") || continuteType.Contains("季完结]"))
                    {
                        continuted = true;
                    }

                    Console.WriteLine(detailUrl);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Zimuzu))
                    {
                        int blogId = CrawlerUtility.ExistContinutedRecord(resourceId, ResourceType.Zimuzu);
                        if (blogId > 0)
                        {
                            Console.WriteLine("resourceId:{0} exist,to be updated blogId:{1}", resourceId, blogId);
                            if (AppendBlogLinks(resourceId, blogId))
                            {
                                Console.WriteLine("Blog Updated:" + blogId);
                            }
                            else
                            {
                                Console.WriteLine("Blog Sync Fail,resourceId:{0},blogId:{1}", resourceId, blogId);
                            }
                            if (!continuted)
                            {
                                CrawlerUtility.UpdateRecordOver(blogId);
                            }
                        }
                        else
                        {
                            Console.WriteLine("resourceId:{0} exist", resourceId);
                        }
                        continue;
                    }

                    var blog = new Blog {
                        Title = title.Length > 200 ? title.Substring(0, 200) : title
                    };
                    string blogImgUrl  = "";
                    string coverImgUrl = "";
                    GetBlogContent(detailUrl, blog, out blogImgUrl, out coverImgUrl);
                    string      linkUrl  = detailUrl.Replace("resource", "resource/list");
                    List <Link> linkList = GetBlogLink(linkUrl);
                    if (string.IsNullOrEmpty(blog.Content) || linkList.Count == 0)
                    {
                        continue;
                    }
                    ImageUrl imageUrl = DownloadBlogImgToLocal(blogImgUrl);
                    bool     syncFlag = false;
                    syncFlag = SaveBlog(blog, imageUrl, linkList);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, coverImgUrl);
                    if (!syncFlag)
                    {
                        Console.WriteLine("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Zimuzu, continuted);
                    Console.WriteLine("Blog Added:" + blog.Id);
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                }
            }
        }
Ejemplo n.º 15
0
            private static void GetListPage(string url, string category)
            {
                string html = GetGeneralContent(url);

                if (html == "")
                {
                    return;
                }
                HtmlDocument document = new HtmlDocument();

                document.LoadHtml(html);
                HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@id='tech']");

                if (htmlNode == null)
                {
                    return;
                }
                foreach (HtmlNode node in htmlNode.Elements("div"))
                {
                    try
                    {
                        var childrenDiv = node.Elements("div").ToArray();
                        if (childrenDiv == null || childrenDiv.Length < 3)
                        {
                            continue;
                        }
                        var blog  = new Blog();
                        var thumb = childrenDiv.FirstOrDefault();
                        if (thumb == null)
                        {
                            continue;
                        }
                        var link = thumb.Element("a");
                        if (link == null)
                        {
                            continue;
                        }
                        var detailUrl = link.GetAttributeValue("href", "");
                        if (detailUrl == "")
                        {
                            continue;
                        }
                        Trace(detailUrl);
                        string resourceId = GetResourceId(detailUrl);
                        if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Ed2Kers))
                        {
                            Info("resourceId:{0} exist", resourceId);
                            continue;
                        }
                        var    imageNode = link.Element("img");
                        string imageUrl  = "";
                        if (imageNode != null)
                        {
                            imageUrl = imageNode.GetAttributeValue("data-original", "");
                            Trace("imageUrl:" + imageUrl);
                        }

                        var titleNode = childrenDiv[1].Element("ul").Element("li").Elements("a").LastOrDefault();
                        if (titleNode == null)
                        {
                            continue;
                        }
                        var title = titleNode.InnerText;
                        if (title == "")
                        {
                            continue;
                        }
                        blog.Title = title.Length > 200 ? title.Substring(0, 200) : title;
                        Trace(title);

                        var urlList = GetIntroDetail("http://www.ed2kers.com/" + detailUrl, blog);
                        if (urlList.Count == 0)
                        {
                            continue;
                        }
                        bool syncFlag = SaveBlog(blog, urlList);
                        if (!syncFlag)
                        {
                            Info("Blog Sync Fail,url:{0},blogId:{1}", url, blog.Id);
                            continue;
                        }
                        syncFlag = SaveRecomment(blog, imageUrl, category);
                        if (!syncFlag)
                        {
                            Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                        }
                        CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Ed2Kers);
                        Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId);
                        _fetchCount++;
                    }
                    catch (Exception ex)
                    {
                        Logger.Error(ex);
                    }
                }
            }
Ejemplo n.º 16
0
        private void UpdateServiceProcess()
        {
            while (true)
            {
                try
                {
                    var blogImageList  = ImageUrlDao.GetToUploadList();
                    int blogImageCount = blogImageList.Count;
                    MessageLog("updating blogImageCount:" + blogImageCount);
                    int successCount = 0;
                    foreach (ImageUrl image in blogImageList)
                    {
                        string localPath = Const.BlogFileDirectory + image.Url;
                        string yunUrl    = UploadUtility.UploadLocalFile(localPath);
                        if (yunUrl == "")
                        {
                            Thread.Sleep(5 * 60 * 1000);
                            continue;
                        }
                        image.YunUrl      = yunUrl;
                        image.ImageStatus = ImageStatus.Yun;
                        if (SyncUtility.SyncImageUrl(image))
                        {
                            MessageLog("sync success,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl);
                            ImageUrlDao.DeleteRecord(image.Id);
                            successCount++;
                        }
                        else
                        {
                            MessageLog("sync fail,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl);
                        }
                        Thread.Sleep(15 * 1000);
                    }
                    MessageLog("updated blogImageCount:" + successCount);

                    var coverImageList  = RecommentDao.GetToUploadList();
                    int coverImageCount = coverImageList.Count;
                    MessageLog("updating coverImageCount:" + coverImageCount);
                    successCount = 0;
                    foreach (Recomment image in coverImageList)
                    {
                        if (image.CoverName.Trim().StartsWith("http"))
                        {
                            image.CoverName = CrawlerUtility.GetFileContent(image.CoverName.Trim(),
                                                                            Const.CoverFileDirectory, "");
                            if (string.IsNullOrEmpty(image.CoverName))
                            {
                                continue;
                            }
                        }
                        string localPath = Const.CoverFileDirectory + image.CoverName;
                        string yunUrl    = UploadUtility.UploadLocalFile(localPath);
                        if (yunUrl == "")
                        {
                            //Thread.Sleep(5 * 60 * 1000);
                            continue;
                        }
                        image.YunUrl      = yunUrl;
                        image.ImageStatus = ImageStatus.Yun;
                        if (SyncUtility.SyncRecomment(image))
                        {
                            MessageLog("sync success,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl);
                            RecommentDao.DeleteRecord(image.Id);
                            successCount++;
                        }
                        else
                        {
                            MessageLog("sync fail,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl);
                        }
                        Thread.Sleep(15 * 1000);
                    }
                    MessageLog("updated blogImageCount:" + successCount);
                    if (blogImageCount == 0 && coverImageCount == 0)
                    {
                        MessageLog("record empty,sleeping 10 minutes");
                        Thread.Sleep(10 * 60 * 1000);
                    }
                }
                catch (ThreadAbortException threadAbortException)
                {
                    Logger.Error(threadAbortException);
                    break;
                }
                catch (Exception ex)
                {
                    Logger.Error(ex);
                    Thread.Sleep(10 * 60 * 1000);
                }
            }
        }
Ejemplo n.º 17
0
        private static void GetListPage(string url, string category)
        {
            string html = GetGeneralContent(url);

            if (html == "")
            {
                return;
            }
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(html);
            HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//table[@class='CommonListArea']");

            if (htmlNode == null)
            {
                return;
            }
            int    tdIndex   = 0;
            string detailUrl = "";

            foreach (HtmlNode node in htmlNode.Elements("tr"))
            {
                try
                {
                    if (++tdIndex <= 2)
                    {
                        continue;
                    }
                    var tdArray = node.Elements("td").ToArray();
                    if (tdArray.Length < 4)
                    {
                        continue;
                    }
                    var blog      = new Blog();
                    var titleNode = tdArray[0];

                    var title = titleNode.InnerText;
                    if (title == "")
                    {
                        continue;
                    }
                    blog.Title = title.Length > 200 ? title.Substring(0, 200) : title;
                    Trace(title);
                    var link = titleNode.Elements("a").Skip(1).FirstOrDefault();
                    if (link == null)
                    {
                        continue;
                    }
                    detailUrl = link.GetAttributeValue("href", "");
                    if (detailUrl == "")
                    {
                        continue;
                    }
                    Trace(detailUrl);
                    string resourceId = GetResourceId(detailUrl);
                    if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Ed2000))
                    {
                        _existResource++;
                        var distributeDate = tdArray[1].InnerText;
                        var updateTime     = tdArray[2].InnerText;
                        if (updateTime.Contains(distributeDate))
                        {
                            Info("Resource Existed,resourceId:{0}", resourceId);
                        }
                        else
                        {
                            _updateResource++;
                            int blogId = CrawlerUtility.ExistContinutedRecord(resourceId, ResourceType.Ed2000);
                            Info("resourceId:{0} exist,to be updated blogId:{1}", resourceId, blogId);
                            if (AppendBlogLinks(resourceId, blogId))
                            {
                                Info("Blog Updated:" + blogId);
                            }
                            else
                            {
                                Info("Blog Sync Fail,resourceId:{0},blogId:{1}", resourceId, blog.Id);
                            }
                            _fetchCount++;
                        }
                        continue;
                    }
                    string imageUrl = "";
                    var    urlList  = GetIntroDetail("http://www.ed2000.com" + detailUrl, blog, out imageUrl);
                    if (urlList.Count == 0)
                    {
                        continue;
                    }
                    bool syncFlag = false;
                    syncFlag = SaveBlog(blog, urlList);
                    if (!syncFlag)
                    {
                        Info("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id);
                        continue;
                    }
                    syncFlag = SaveRecomment(blog, imageUrl, category);
                    if (!syncFlag)
                    {
                        Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl);
                    }
                    CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Ed2000, true);
                    Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId);
                    _fetchCount++;
                }
                catch (Exception ex)
                {
                    Logger.Error(ex, detailUrl);
                }
            }
        }