private void ThreadProcessData(ImgLink img) { Console.WriteLine("Start Check: " + img.linkimg); var _bolService = new BOLService.BOLService(); try { var req = (HttpWebRequest)HttpWebRequest.Create(img.linkimg); req.Method = "HEAD"; using (var resp = req.GetResponse()) { if (!resp.ContentType.ToLower(CultureInfo.InvariantCulture).StartsWith("image/")) { Console.WriteLine("Invalid: " + img.linkimg); _bolService.UpdateBadURL(img.ID, true); } else { _bolService.UpdateBadURL(img.ID, false); } } } catch (Exception ex) { Console.WriteLine("Error: " + ex.Message); _bolService.UpdateBadURL(img.ID); } Console.WriteLine("End Check: " + img.linkimg); }
public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle) { try { var bolService = new BOLService.BOLService(); if (bolService.CheckExistLinkByDomain(strCounter, "xemlasuong.org")) { return; } var lst = new List <ImgLink>(); var web = new HtmlWeb(); var doc = web.Load(strPage); //TODO: Check valid var divContainer = doc.DocumentNode.SelectSingleNode("//div[@class = 'blog-single-para']"); if (divContainer == null) { return; } HtmlNodeCollection nodeImagesCollection = divContainer.SelectNodes("//img"); foreach (var node in nodeImagesCollection) { try { string strLink = node.Attributes["src"].Value; string strClass = node.Attributes["class"] != null ? node.Attributes["class"].Value : string.Empty; if (strClass.Contains("size-full")) { var item = new ImgLink() { Category = category, Counter = strCounter, CreateDate = DateTime.Now, Domain = "xemlasuong.org", GroupName = strTitle, linkimg = strLink }; lst.Add(item); } } catch (Exception ex) { //TODO: show error; Console.WriteLine(ex.ToString()); } } if (lst.Count > 0) { bolService.SaveImg(lst); Console.WriteLine(strPage); } } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle) { try { var bolService = new BOLService.BOLService(); var lst = new List <ImgLink>(); var web = new HtmlWeb(); var doc = web.Load(strPage); //TODO: Check valid var divContainer = doc.DocumentNode.SelectSingleNode("//div[@class = 'photo-container']"); if (divContainer != null) { //var imgNodes = divContainer.SelectNodes("//img[@onerror = 'imgerror(this)']"); foreach (HtmlNode node in divContainer.ChildNodes) { try { if (node.Name.Equals("a")) { foreach (HtmlNode imgNode in node.ChildNodes) { if (imgNode.Name.Equals("img")) { string strLink = imgNode.Attributes["src"].Value; if (!strLink.Contains("Content/themes/noimg_big.jpg") && !strLink.Contains("http://api.xinhvl.tekreds.com")) { ImgLink obj = new ImgLink() { CreateDate = DateTime.UtcNow, Domain = "xinh.hay.la", Counter = strCounter, linkimg = strLink, Category = category, GroupName = strTitle }; lst.Add(obj); break; } } } } } catch (Exception exception) { Console.WriteLine(exception.ToString()); } } } if (lst.Count > 0) { bolService.SaveImg(lst); Console.WriteLine(strPage); } } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle) { try { var bolService = new BOLService.BOLService(); if (bolService.CheckExistLinkByDomain(strCounter, "phunuvn.net")) { return; } var lst = new List <ImgLink>(); var web = new HtmlWeb(); var doc = web.Load(strPage); //TODO: Check valid var divContainer = doc.DocumentNode.SelectSingleNode("//blockquote[@class = 'messageText ugc baseHtml']"); if (divContainer == null) { return; } HtmlNodeCollection nodeImagesCollection = divContainer.SelectNodes("//img[@class = 'bbCodeImage LbImage']"); foreach (var node in nodeImagesCollection) { try { string strLink = node.Attributes["src"].Value; var item = new ImgLink() { Category = category, Counter = strCounter, CreateDate = DateTime.Now, Domain = "phunuvn.net", GroupName = strTitle, linkimg = strLink }; lst.Add(item); } catch (Exception ex) { //TODO: show error; Console.WriteLine(ex.ToString()); } } if (lst.Count > 0) { bolService.SaveImg(lst); Console.WriteLine(strPage); } } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
private static void GetChanDaiPhoto() { var bolService = new BOLService.BOLService(); List <ImgLink> lst = new List <ImgLink>(); var web = new HtmlWeb(); int counter = 0; int error = 0; int start = 1; int end = 100; var vStartObj = bolService.GetLastestChanDaiImage(); if (vStartObj != null) { if (!int.TryParse(vStartObj.Counter, out start)) { Console.WriteLine("Not Start ^_^"); return; } } end = start * 1000; for (int i = start; i < end; i++) { try { var doc = web.Load("http://chandai.tv/photo/" + i); var vImg = doc.DocumentNode.SelectSingleNode("//img[@class = 'img-responsive']"); if (vImg != null && vImg.Attributes.Count > 1) { counter = i; var vResult = vImg.Attributes[1]; if (vResult.Value.Equals("/Content/images/notfound.jpg")) { error += 1; if (error == 300) { break; } } else { error = 0; //Only end when 10 sequence times no image string strResult = string.Format("http://chandai.tv{0}", vResult.Value); var img = new ImgLink() { linkimg = strResult, Counter = i.ToString(), CreateDate = DateTime.Now, Domain = "chandai.tv" }; lst.Add(img); if (lst.Count == 10) { bolService.SaveImg(lst); lst.Clear(); Console.WriteLine("Save link " + i); } } } } catch (Exception exception) { log4net.Config.XmlConfigurator.Configure(); log.Info("Error: " + exception); Console.WriteLine(exception.ToString()); } } if (lst.Count > 0) { bolService.SaveImg(lst); lst.Clear(); } Console.WriteLine("Finish ^_^"); }
public static void TestImageOnePage(string strPage) { try { var bolService = new BOLService.BOLService(); var lst = new List <ImgLink>(); var web = new HtmlWeb(); var doc = web.Load(strPage); //TODO: Check valid var divContainer = doc.DocumentNode.SelectNodes("//a[@imageanchor = '1']"); if (divContainer == null) { var v = doc.DocumentNode.SelectSingleNode("//div[@class = 'post-content']"); if (v != null && v.HasChildNodes) { foreach (HtmlNode htmlNode in v.ChildNodes) { if (htmlNode.Name.Equals("div") && htmlNode.Attributes["style"].Value == "padding:10px") { var vTagP = htmlNode.SelectSingleNode("//p"); if (!vTagP.HasAttributes) { divContainer = vTagP.SelectNodes("//img"); foreach (HtmlNode node in divContainer) { try { string str = node.Attributes["src"].Value; if (str.Contains("photo.hoanvu.net/wp-content/uploads/thumbnail/") || str.Contains("photo.hoanvu.net/wp-content/themes/iphoto/images/logo.png")) { continue; } if (str.Contains("blogspot.com") || str.Contains("photo.hoanvu.net/wp-content/") || str.Contains("data.hoanvu.net/images")) { ImgLink obj = new ImgLink() { CreateDate = DateTime.UtcNow, Domain = "photo.hoanvu.net", //Counter = strCounter, linkimg = str, //Category = strCategory, //GroupName = strTitle }; lst.Add(obj); Console.WriteLine(str); } } catch (Exception exception) { Console.WriteLine(exception.ToString()); } } } } } } } else { foreach (HtmlNode node in divContainer) { try { ImgLink obj = new ImgLink() { CreateDate = DateTime.UtcNow, Domain = "photo.hoanvu.net", //Counter = strCounter, linkimg = node.Attributes["href"].Value, //Category = strCategory, //GroupName = strTitle }; lst.Add(obj); } catch (Exception exception) { Console.WriteLine(exception.ToString()); } } } bolService.SaveImg(lst); Console.WriteLine(strPage); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle) { try { var bolService = new BOLService.BOLService(); if (bolService.CheckExistLinkByDomain(strCounter, "xemanh.net")) { return; } var lst = new List <ImgLink>(); var web = new HtmlWeb(); var doc = web.Load(strPage); //TODO: Check valid var divContainer = doc.DocumentNode.SelectSingleNode("//div[@id = 'main']"); if (divContainer == null) { return; } HtmlNode articleNode = divContainer.SelectSingleNode("//article"); HtmlNode sectionNode = articleNode.SelectSingleNode("//section"); foreach (var node in sectionNode.ChildNodes) { try { if (node.Name.Equals("p") && node.HasAttributes && node.HasChildNodes) { HtmlNode nodeA = node.FirstChild; if (node.ChildNodes.Count > 1) { foreach (HtmlNode f1Node in node.ChildNodes) { if (f1Node.Name.Equals("a") || f1Node.Name.Equals("img")) { nodeA = f1Node; break; } } } HtmlNode nodeImg = nodeA; if (nodeA.Name.Equals("a") && nodeA.HasChildNodes) { foreach (HtmlNode fNode in nodeA.ChildNodes) { if (fNode.Name.Equals("img")) { nodeImg = fNode; break; } } } if (nodeImg != null && nodeImg.Name.Equals("img") && nodeImg.Attributes["src"] != null) { string strLink = nodeImg.Attributes["src"].Value; var item = new ImgLink() { Category = category, Counter = strCounter, CreateDate = DateTime.Now, Domain = "xemanh.net", GroupName = strTitle, linkimg = strLink }; lst.Add(item); } } } catch (Exception ex) { //TODO: show error; Console.WriteLine(ex.ToString()); } } if (lst.Count > 0) { bolService.SaveImg(lst); Console.WriteLine(strPage); } } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle) { try { var bolService = new BOLService.BOLService(); if (bolService.CheckExistLinkByDomain(strCounter, "tructiepso.com")) { return; } var lst = new List <ImgLink>(); var web = new HtmlWeb(); var doc = web.Load(strPage); //TODO: Check valid var divContainer = doc.DocumentNode.SelectSingleNode("//div[@class = 'entry-content rich-content']"); if (divContainer == null) { divContainer = doc.DocumentNode.SelectSingleNode("//div[@id = 'content_document']"); if (divContainer == null) { return; } } HtmlNodeCollection nodeImagesCollection = divContainer.SelectNodes("//img"); foreach (var node in nodeImagesCollection) { try { string strLink = node.Attributes["src"].Value; if (!strLink.Contains("tructiepso.com/wp-content/uploads") && !strLink.Contains("gamer.gif")) { var item = new ImgLink() { Category = category, Counter = strCounter, CreateDate = DateTime.Now, Domain = "tructiepso.com", GroupName = strTitle, linkimg = strLink }; lst.Add(item); } } catch (Exception ex) { //TODO: show error; Console.WriteLine(ex.ToString()); } } if (lst.Count > 0) { bolService.SaveImg(lst); Console.WriteLine(strPage); } } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }