private static bool SaveRecomment(Blog blog, string imageUrl, string category) { if (imageUrl == "") { return(true); } string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.ed2kers.com/"); if (string.IsNullOrEmpty(fileName)) { fileName = imageUrl; } string localFilePath = Const.CoverFileDirectory + fileName; string yunUrl = ""; //UploadUtility.UploadLocalFile(localFilePath); Info("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl); var recomment = new Recomment() { BlogId = blog.Id, CoverName = fileName, Title = blog.Title, TimeStamp = DateTime.Now, Category = SetCategory(category) }; if (!string.IsNullOrEmpty(yunUrl)) { recomment.YunUrl = yunUrl; recomment.ImageStatus = ImageStatus.Yun; } return(SyncUtility.SyncRecomment(recomment)); }
private static bool SaveRecomment(Blog blog, string coverImgUrl) { if (coverImgUrl == "") { return(true); } string fileName = CrawlerUtility.GetFileContent(coverImgUrl, Const.CoverFileDirectory, "http://www.zimuzu.tv/"); if (string.IsNullOrEmpty(fileName)) { return(true); } string localFilePath = Const.CoverFileDirectory + fileName; var image = Domain.Helper.ImgHandler.ZoomPicture(Image.FromFile(localFilePath), 200, 110); image.Save(localFilePath); string yunUrl = UploadUtility.UploadLocalFile(localFilePath); Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl); var recomment = new Recomment() { BlogId = blog.Id, CoverName = fileName, Title = blog.Title, TimeStamp = DateTime.Now, Category = SetCategoryByTitle(blog.Title) }; if (!string.IsNullOrEmpty(yunUrl)) { recomment.YunUrl = yunUrl; recomment.ImageStatus = ImageStatus.Yun; } return(SyncUtility.SyncRecomment(recomment)); }
private static ImageUrl DownloadBlogImgToLocal(string blogImgUrl) { ImageUrl imageUrl = new ImageUrl(); string fileName = CrawlerUtility.GetFileContent(blogImgUrl, Const.BlogFileDirectory, "http://www.zimuzu.tv/"); if (fileName == "") { return(null); } string localFilePath = Const.BlogFileDirectory + fileName; using (Image downloadImage = Image.FromFile(localFilePath)) { if (downloadImage.Size.Width > 500) { var image = Domain.Helper.ImgHandler.ZoomPictureProportionately(downloadImage, 500, 500 * downloadImage.Size.Height / downloadImage.Size.Width); image.Save(localFilePath); } } string yunUrl = UploadUtility.UploadLocalFile(localFilePath); Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl); imageUrl.Url = fileName; imageUrl.ImageStatus = ImageStatus.Local; imageUrl.TimeStamp = DateTime.Now; if (!string.IsNullOrEmpty(yunUrl)) { imageUrl.YunUrl = yunUrl; imageUrl.ImageStatus = ImageStatus.Yun; } return(imageUrl); }
private static string GetGeneralContent(string strUrl) { string referer = "http://www.zimuzu.tv/"; var cookies = new List <Cookie> { new Cookie("CNZZDATA1254180690", "176448891-1468518959-%7C1468679132", "/", "www.zimuzu.tv"), new Cookie("PHPSESSID", "ht8gtggpqjlth57ef41b7g03k0", "/", "www.zimuzu.tv"), new Cookie("GINFO", "uid%3D3667676%26nickname%3Dkinshine%26group_id%3D1%26avatar_t%3D%26main_group_id%3D0%26common_group_id%3D54", "/", ".zimuzu.tv"), new Cookie("GKEY", "e4b20dc7275fac6a7f984afcb5938ff3", "/", ".zimuzu.tv"), new Cookie("mykeywords", "%3A1%3A%7Bi%3A0%3Bs%3A21%3A%22%E8%82%A5%E7%91%9E%E7%9A%84%E7%96%AF%E7%8B%82%E6%97%A5%E8%AE%B0%22%3B%7D", "/", ".zimuzu.tv") }; string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies); if (string.IsNullOrEmpty(html)) { Thread.Sleep(10 * 1000); } else { Thread.Sleep(5 * 1000); } return(html); }
private static string GetGeneralContent(string strUrl) { _requestCount++; string referer = "http://www.gdajie.com/"; var cookies = new List <Cookie>() { new Cookie("AJSTAT_ok_times", "5", "/", "verycd.gdajie.com"), new Cookie("Hm_lpvt_9398e7331484620894c57216bea9225e", "1468695450", "/", ".gdajie.com"), new Cookie("JSESSIONID", "7F0463C2223BFF2E8868FC7EFA7D0167", "/", "verycd.gdajie.com"), new Cookie("CNZZDATA4616656", "cnzz_eid%3D1953947045-1464628831-http%253A%252F%252Fwww.verycd.gdajie.com%252F%26ntime%3D1467845452", "/", "verycd.gdajie.com"), new Cookie("CNZZDATA1254524697", "78693709-1464629094-%7C1468690207", "/", "verycd.gdajie.com") }; string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies); if (html.Contains("对不起,我们怀疑您正使用采集软件对我们的网站进行采集,所以我们采取了封您ip的决定")) { Console.WriteLine("url:" + strUrl + ",ip被封,_requestCount:" + _requestCount); Info("url:" + strUrl + ",ip被封,requestCount:" + _requestCount); html = ""; } if (string.IsNullOrEmpty(html)) { Thread.Sleep(10 * 1000); } else { Thread.Sleep(5 * 1000); } return(html); }
private static string GetGeneralContent(string strUrl) { _requestCount++; string referer = "http://www.mp4ba.com/"; var cookies = new List <Cookie>() { new Cookie("CNZZDATA5925857", "cnzz_eid%3D921376316-1462025230-%26ntime%3D1467044423", "/", "www.mp4ba.com") }; string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies); if (html.Contains("服务器维护中,请稍后访问")) { Info("服务器维护中,请稍后访问,60秒后继续操作"); html = ""; } if (string.IsNullOrEmpty(html)) { Thread.Sleep(60 * 1000); } else { Thread.Sleep(5 * 1000); } return(html); }
private static string GetGeneralContent(string strUrl) { _requestCount++; string referer = "http://www.gdajie.com/"; var cookies = new List <Cookie>() { new Cookie("AJSTAT_ok_pages", _requestCount.ToString(), "/", "www.ed2kers.com"), new Cookie("AJSTAT_ok_times", "1", "/", "www.ed2kers.com"), new Cookie("Hm_lpvt_9398e7331484620894c57216bea9225e", "1468695450", "/", ".gdajie.com"), new Cookie("PHPSESSID", "0hbum6a055q5sfg5oi03a7ug35", "/", "www.ed2kers.com"), new Cookie("Hm_lvt_9ad7d25789ce8adb7b225bc58a1b8525", "1470154733", "/", "ed2kers.com"), new Cookie("Hm_lpvt_9ad7d25789ce8adb7b225bc58a1b8525", "1470156513", "/", "ed2kers.com") }; string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies); if (string.IsNullOrEmpty(html)) { //Thread.Sleep(10 * 1000); } else { //Thread.Sleep(5 * 1000); } return(html); }
private static void GetListPage(string url) { string html = GetGeneralContent(url); HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//tbody[@id='data_list']"); foreach (HtmlNode node in htmlNode.Elements("tr")) { try { var elements = node.Elements("td").ToArray(); if (elements.Length < 3) { continue; } var category = elements[1].InnerText.Trim(); Console.WriteLine(category); var detailTd = elements[2]; var detailLink = detailTd.Element("a"); var detailUrl = detailLink.GetAttributeValue("href", "").Trim(); Console.WriteLine(detailUrl); var title = detailLink.InnerText.Trim().Replace(".Mp4Ba", ""); Console.WriteLine(title); string resourceId = GetResourceId(detailUrl); if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Mp4Ba)) { continue; } var blog = new Blog { Title = title.Length > 200 ? title.Substring(0, 200) : title }; string coverUrl = ""; var link = GetIntroDetail("http://www.mp4ba.com/" + detailUrl, blog, out coverUrl); bool syncFlag = false; syncFlag = SaveBlog(blog, link); if (!syncFlag) { Console.WriteLine("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id); continue; } syncFlag = SaveRecomment(blog, coverUrl, category); if (!syncFlag) { Console.WriteLine("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, coverUrl); } CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Mp4Ba); Console.WriteLine("Blog Added:" + blog.Id); } catch (Exception ex) { Logger.Error(ex); } } }
private static bool SaveRecomment(Blog blog, string imageUrl, string category) { if (imageUrl == "") { return(true); } string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.mp4ba.com/"); if (fileName == "") { return(true); } var recomment = new Recomment() { BlogId = blog.Id, Title = blog.Title, CoverName = fileName, TimeStamp = DateTime.Now }; if (category.Contains("电影")) { recomment.Category = RecommentCategory.电影; } if (category.Contains("电视剧")) { recomment.Category = RecommentCategory.剧集; } if (category == "欧美电视剧") { recomment.Category = RecommentCategory.美剧; } if (category == "日韩电视剧") { recomment.Category = RecommentCategory.韩剧; } if (category == "综艺娱乐") { recomment.Category = RecommentCategory.综艺; } string localFilePath = Const.CoverFileDirectory + fileName; var image = Domain.Helper.ImgHandler.ZoomPicture(Image.FromFile(localFilePath), 200, 110); image.Save(localFilePath); string yunUrl = UploadUtility.UploadLocalFile(localFilePath); Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl); if (!string.IsNullOrEmpty(yunUrl)) { recomment.YunUrl = yunUrl; recomment.ImageStatus = ImageStatus.Yun; } return(SyncUtility.SyncRecomment(recomment)); }
private static bool SaveRecomment(Blog blog, string imageUrl, string category) { if (imageUrl == "http://verycd.gdajie.com/img/default_cover.jpg" || imageUrl == "") { return(true); } string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.gdajie.com/"); if (string.IsNullOrEmpty(fileName)) { return(true); } string localFilePath = Const.CoverFileDirectory + fileName; try { using (var localImage = Image.FromFile(localFilePath)) { var image = ImgHandler.ZoomPicture(localImage, 200, 110); image.Save(localFilePath); } } catch (Exception ex) { Logger.Error(ex, "localFilePath:{0}", localFilePath); return(true); } string yunUrl = UploadUtility.UploadLocalFile(localFilePath); Info("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl); var recomment = new Recomment() { BlogId = blog.Id, CoverName = fileName, Title = blog.Title, TimeStamp = DateTime.Now, Category = SetCategory(category) }; if (!string.IsNullOrEmpty(yunUrl)) { recomment.YunUrl = yunUrl; recomment.ImageStatus = ImageStatus.Yun; } return(SyncUtility.SyncRecomment(recomment)); }
private static bool SaveRecomment(Blog blog, string imageUrl, string category) { if (imageUrl == "http://verycd.gdajie.com/img/default_cover.jpg" || imageUrl == "") { return(true); } string fileName = CrawlerUtility.GetFileContent(imageUrl, Const.CoverFileDirectory, "http://www.gdajie.com/"); if (string.IsNullOrEmpty(fileName)) { return(true); } string localFilePath = Const.CoverFileDirectory + fileName; var image = Domain.Helper.ImgHandler.ZoomPicture(Image.FromFile(localFilePath), 200, 110); image.Save(localFilePath); string yunUrl = UploadUtility.UploadLocalFile(localFilePath); Console.WriteLine("localFile:{0} upload to yunUrl:{1}", localFilePath, yunUrl); var recomment = new Recomment() { BlogId = blog.Id, CoverName = fileName, Title = blog.Title, TimeStamp = DateTime.Now, Category = SetCategory(category) }; if (!string.IsNullOrEmpty(yunUrl)) { recomment.YunUrl = yunUrl; recomment.ImageStatus = ImageStatus.Yun; } return(SyncUtility.SyncRecomment(recomment)); //Context.Set<Recomment>().Add(recomment); //Context.SaveChanges(); }
private static string GetGeneralContent(string strUrl) { _requestCount++; string referer = "http://www.ed2000.com/"; var cookies = new List <Cookie>() { new Cookie("ASPSESSIONIDQCSQTDQD", "OPOIBEHBPMHCPNAMCBJHFPDD", "/", "www.ed2000.com"), new Cookie("UserID", "30602", "/", "www.ed2000.com"), new Cookie("UserPassword", "A9E485F632124349EDFCEC448E649702", "/", "www.ed2000.com"), new Cookie("CNZZDATA947842", "cnzz_eid%3D1459071056-1467470818-null%26ntime%3D1469246688", "/", "www.ed2000.com"), new Cookie("VisitsNumber", _requestCount.ToString(), "/", "www.ed2000.com") }; string html = CrawlerUtility.GetGeneralContent(strUrl, referer, cookies); if (string.IsNullOrEmpty(html)) { Thread.Sleep(60 * 1000); } else { Thread.Sleep(2 * 1000); } return(html); }
private static void GetListPage(string url, string category) { string html = GetGeneralContent(url); if (html == "") { return; } HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@id='main']//ul"); if (htmlNode == null) { return; } foreach (HtmlNode node in htmlNode.Elements("li")) { try { var blog = new Blog(); var thumb = node.Element("div"); if (thumb == null) { continue; } var link = thumb.Element("a"); if (link == null) { continue; } var detailUrl = link.GetAttributeValue("href", ""); if (detailUrl == "") { continue; } Trace(detailUrl); string resourceId = GetResourceId(detailUrl); if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Verycd)) { Info("resourceId:{0} exist", resourceId); _existResource++; continue; } var title = link.GetAttributeValue("title", ""); if (title == "") { continue; } blog.Title = title.Length > 200 ? title.Substring(0, 200) : title; Trace(title); var style = link.GetAttributeValue("style", ""); var imageUrl = style.Replace("background-image:", "").Replace("url(", "").Replace(")", "").Trim(); Trace("imageUrl:" + imageUrl); var info = node.Elements("div").Skip(1).FirstOrDefault(); if (info == null) { continue; } var infoParas = info.Elements("p"); StringBuilder htmlBuilder = new StringBuilder(); StringBuilder textBuilder = new StringBuilder(); var paraArray = infoParas.Skip(1).Take(4).ToArray(); foreach (HtmlNode para in paraArray) { htmlBuilder.Append(para.OuterHtml); textBuilder.Append(para.InnerText); } blog.HtmlContent = htmlBuilder.ToString(); blog.Content = textBuilder.ToString(); var urlList = GetIntroDetail(detailUrl, blog); if (urlList.Count == 0) { continue; } bool syncFlag = SaveBlog(blog, urlList); if (!syncFlag) { Info("Blog Sync Fail,url:{0},blogId:{1}", url, blog.Id); continue; } syncFlag = SaveRecomment(blog, imageUrl, category); if (!syncFlag) { Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl); } CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Verycd); Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId); _fetchCount++; } catch (Exception ex) { Logger.Error(ex); } } }
private static void GetListPage(string url) { string html = GetGeneralContent(url); if (html == "") { return; } HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@class='resource-showlist']//ul"); if (htmlNode == null) { return; } foreach (HtmlNode node in htmlNode.Elements("li")) { try { bool continuted = false; string continuteType = ""; var nodeArray = node.Elements("div").ToArray(); var infoNode = nodeArray[1]; if (infoNode == null) { continue; } var detailNode = infoNode.Element("dl").Element("dt").Element("strong").Element("a"); var detailContinuedNode = infoNode.Element("dl").Element("dt").Element("font"); if (detailNode == null) { continue; } string title = detailNode.InnerText; string detailUrl = detailNode.GetAttributeValue("href", ""); if (detailUrl == "") { continue; } Console.WriteLine(title); if (detailContinuedNode != null) { continuteType = detailContinuedNode.InnerText; Console.WriteLine(continuteType); } if (continuteType.Contains("[尚未开播]")) { continue; } if (continuteType.Contains("连载中]") || continuteType.Contains("季完结]")) { continuted = true; } Console.WriteLine(detailUrl); string resourceId = GetResourceId(detailUrl); if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Zimuzu)) { int blogId = CrawlerUtility.ExistContinutedRecord(resourceId, ResourceType.Zimuzu); if (blogId > 0) { Console.WriteLine("resourceId:{0} exist,to be updated blogId:{1}", resourceId, blogId); if (AppendBlogLinks(resourceId, blogId)) { Console.WriteLine("Blog Updated:" + blogId); } else { Console.WriteLine("Blog Sync Fail,resourceId:{0},blogId:{1}", resourceId, blogId); } if (!continuted) { CrawlerUtility.UpdateRecordOver(blogId); } } else { Console.WriteLine("resourceId:{0} exist", resourceId); } continue; } var blog = new Blog { Title = title.Length > 200 ? title.Substring(0, 200) : title }; string blogImgUrl = ""; string coverImgUrl = ""; GetBlogContent(detailUrl, blog, out blogImgUrl, out coverImgUrl); string linkUrl = detailUrl.Replace("resource", "resource/list"); List <Link> linkList = GetBlogLink(linkUrl); if (string.IsNullOrEmpty(blog.Content) || linkList.Count == 0) { continue; } ImageUrl imageUrl = DownloadBlogImgToLocal(blogImgUrl); bool syncFlag = false; syncFlag = SaveBlog(blog, imageUrl, linkList); if (!syncFlag) { Console.WriteLine("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id); continue; } syncFlag = SaveRecomment(blog, coverImgUrl); if (!syncFlag) { Console.WriteLine("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl); } CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Zimuzu, continuted); Console.WriteLine("Blog Added:" + blog.Id); } catch (Exception ex) { Logger.Error(ex); } } }
private static void GetListPage(string url, string category) { string html = GetGeneralContent(url); if (html == "") { return; } HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//div[@id='tech']"); if (htmlNode == null) { return; } foreach (HtmlNode node in htmlNode.Elements("div")) { try { var childrenDiv = node.Elements("div").ToArray(); if (childrenDiv == null || childrenDiv.Length < 3) { continue; } var blog = new Blog(); var thumb = childrenDiv.FirstOrDefault(); if (thumb == null) { continue; } var link = thumb.Element("a"); if (link == null) { continue; } var detailUrl = link.GetAttributeValue("href", ""); if (detailUrl == "") { continue; } Trace(detailUrl); string resourceId = GetResourceId(detailUrl); if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Ed2Kers)) { Info("resourceId:{0} exist", resourceId); continue; } var imageNode = link.Element("img"); string imageUrl = ""; if (imageNode != null) { imageUrl = imageNode.GetAttributeValue("data-original", ""); Trace("imageUrl:" + imageUrl); } var titleNode = childrenDiv[1].Element("ul").Element("li").Elements("a").LastOrDefault(); if (titleNode == null) { continue; } var title = titleNode.InnerText; if (title == "") { continue; } blog.Title = title.Length > 200 ? title.Substring(0, 200) : title; Trace(title); var urlList = GetIntroDetail("http://www.ed2kers.com/" + detailUrl, blog); if (urlList.Count == 0) { continue; } bool syncFlag = SaveBlog(blog, urlList); if (!syncFlag) { Info("Blog Sync Fail,url:{0},blogId:{1}", url, blog.Id); continue; } syncFlag = SaveRecomment(blog, imageUrl, category); if (!syncFlag) { Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl); } CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Ed2Kers); Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId); _fetchCount++; } catch (Exception ex) { Logger.Error(ex); } } }
private void UpdateServiceProcess() { while (true) { try { var blogImageList = ImageUrlDao.GetToUploadList(); int blogImageCount = blogImageList.Count; MessageLog("updating blogImageCount:" + blogImageCount); int successCount = 0; foreach (ImageUrl image in blogImageList) { string localPath = Const.BlogFileDirectory + image.Url; string yunUrl = UploadUtility.UploadLocalFile(localPath); if (yunUrl == "") { Thread.Sleep(5 * 60 * 1000); continue; } image.YunUrl = yunUrl; image.ImageStatus = ImageStatus.Yun; if (SyncUtility.SyncImageUrl(image)) { MessageLog("sync success,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl); ImageUrlDao.DeleteRecord(image.Id); successCount++; } else { MessageLog("sync fail,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl); } Thread.Sleep(15 * 1000); } MessageLog("updated blogImageCount:" + successCount); var coverImageList = RecommentDao.GetToUploadList(); int coverImageCount = coverImageList.Count; MessageLog("updating coverImageCount:" + coverImageCount); successCount = 0; foreach (Recomment image in coverImageList) { if (image.CoverName.Trim().StartsWith("http")) { image.CoverName = CrawlerUtility.GetFileContent(image.CoverName.Trim(), Const.CoverFileDirectory, ""); if (string.IsNullOrEmpty(image.CoverName)) { continue; } } string localPath = Const.CoverFileDirectory + image.CoverName; string yunUrl = UploadUtility.UploadLocalFile(localPath); if (yunUrl == "") { //Thread.Sleep(5 * 60 * 1000); continue; } image.YunUrl = yunUrl; image.ImageStatus = ImageStatus.Yun; if (SyncUtility.SyncRecomment(image)) { MessageLog("sync success,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl); RecommentDao.DeleteRecord(image.Id); successCount++; } else { MessageLog("sync fail,localPath:{0} uploaded yunUrl:{1}", localPath, yunUrl); } Thread.Sleep(15 * 1000); } MessageLog("updated blogImageCount:" + successCount); if (blogImageCount == 0 && coverImageCount == 0) { MessageLog("record empty,sleeping 10 minutes"); Thread.Sleep(10 * 60 * 1000); } } catch (ThreadAbortException threadAbortException) { Logger.Error(threadAbortException); break; } catch (Exception ex) { Logger.Error(ex); Thread.Sleep(10 * 60 * 1000); } } }
private static void GetListPage(string url, string category) { string html = GetGeneralContent(url); if (html == "") { return; } HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("//table[@class='CommonListArea']"); if (htmlNode == null) { return; } int tdIndex = 0; string detailUrl = ""; foreach (HtmlNode node in htmlNode.Elements("tr")) { try { if (++tdIndex <= 2) { continue; } var tdArray = node.Elements("td").ToArray(); if (tdArray.Length < 4) { continue; } var blog = new Blog(); var titleNode = tdArray[0]; var title = titleNode.InnerText; if (title == "") { continue; } blog.Title = title.Length > 200 ? title.Substring(0, 200) : title; Trace(title); var link = titleNode.Elements("a").Skip(1).FirstOrDefault(); if (link == null) { continue; } detailUrl = link.GetAttributeValue("href", ""); if (detailUrl == "") { continue; } Trace(detailUrl); string resourceId = GetResourceId(detailUrl); if (CrawlerUtility.ExistRecord(resourceId, ResourceType.Ed2000)) { _existResource++; var distributeDate = tdArray[1].InnerText; var updateTime = tdArray[2].InnerText; if (updateTime.Contains(distributeDate)) { Info("Resource Existed,resourceId:{0}", resourceId); } else { _updateResource++; int blogId = CrawlerUtility.ExistContinutedRecord(resourceId, ResourceType.Ed2000); Info("resourceId:{0} exist,to be updated blogId:{1}", resourceId, blogId); if (AppendBlogLinks(resourceId, blogId)) { Info("Blog Updated:" + blogId); } else { Info("Blog Sync Fail,resourceId:{0},blogId:{1}", resourceId, blog.Id); } _fetchCount++; } continue; } string imageUrl = ""; var urlList = GetIntroDetail("http://www.ed2000.com" + detailUrl, blog, out imageUrl); if (urlList.Count == 0) { continue; } bool syncFlag = false; syncFlag = SaveBlog(blog, urlList); if (!syncFlag) { Info("Blog Sync Fail,detailUrl:{0},blogId:{1}", detailUrl, blog.Id); continue; } syncFlag = SaveRecomment(blog, imageUrl, category); if (!syncFlag) { Info("Recomment Sync Fail,blogId:{0},imageUrl:{1}", blog.Id, imageUrl); } CrawlerUtility.AddResourceRecord(blog.Id, resourceId, ResourceType.Ed2000, true); Info("Blog Added,blogId:{0},resourceId:{1}", blog.Id, resourceId); _fetchCount++; } catch (Exception ex) { Logger.Error(ex, detailUrl); } } }