public void handlesCommentsInTable() { string html = "<table><tr><td>text</td><!-- Comment --></tr></table>"; Document node = NSoupClient.ParseBodyFragment(html); Assert.AreEqual("<html><head></head><body><table><tbody><tr><td>text</td><!-- Comment --></tr></tbody></table></body></html>", TextUtil.StripNewLines(node.OuterHtml())); }
} // end funtion private List <string> linkTongRaGa(string url) { List <string> listtempA = new List <string>(); StreamReader inStreamA; WebRequest webRequestA; WebResponse webresponseA; webRequestA = WebRequest.Create(url); webresponseA = webRequestA.GetResponse(); inStreamA = new StreamReader(webresponseA.GetResponseStream()); String htmlstringA = inStreamA.ReadToEnd(); Document docA = NSoupClient.ParseBodyFragment(htmlstringA); Element DIV = docA.GetElementById("gallery"); Node LinkNode = DIV.GetChildNode(1).GetChildNode(0); listtempA.Add(url); foreach (Node linkNode in LinkNode.ChildNodes) { if (linkNode.Attr("class") == "link3" && !(linkNode.ToString().IndexOf("next") > 0)) { listtempA.Add(url + "/" + linkNode.Attr("href")); } } return(listtempA); }
public void parsesBodyFragment() { string h = "<!-- comment --><p><a href='foo'>One</a></p>"; Document doc = NSoupClient.ParseBodyFragment(h, "http://example.com"); Assert.AreEqual("<body><!-- comment --><p><a href=\"foo\">One</a></p></body>", TextUtil.StripNewLines(doc.Body.OuterHtml())); Assert.AreEqual("http://example.com/foo", doc.Select("a").First.AbsUrl("href")); }
public void handlesUnknownInlineTags() { string h = "<p><cust>Test</cust></p><p><cust><cust>Test</cust></cust></p>"; Document doc = NSoupClient.ParseBodyFragment(h); string output = doc.Body.Html(); Assert.AreEqual(h, TextUtil.StripNewLines(output)); }
public void GetImagesLinkFromUrl4() { //List<ItemDown> listDown = new List<ItemDown>(); StreamWriter sw = new StreamWriter("data.txt", false); //create a new folder string targetfolder = ""; string directory; string foldername = url.Substring(url.LastIndexOf("\\") + 1); targetfolder = string.Format("{0}\\{1}", savepath, foldername);; Directory.CreateDirectory(targetfolder); //lay link galery tu link chung List <string> listtemp = new List <string>(); String htmlstring = File.ReadAllText(url); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("a"); foreach (Element link in Links) { string imagelink = link.Attr("href"); if (!(imagelink == "") && !(imagelink == "/") && imagelink.IndexOf("https://www.pixhost.org") >= 0) { listtemp.Add(imagelink.Replace("https://www.pixhost.org/show/", "https://img1.pixhost.org/images/")); } } int i = 0; foreach (string imagelink in listtemp) { try { string savename = ""; if (i < 10) { directory = string.Format("{0}\\{1}00{2}.jpg", targetfolder, savename, i); } else if (i >= 10 && i < 100) { directory = string.Format("{0}\\{1}0{2}.jpg", targetfolder, savename, i); } else { directory = string.Format("{0}\\{1}{2}.jpg", targetfolder, savename, i); } sw.WriteLine(i + "#" + directory + "#" + imagelink + "#" + 0 + "#waiting"); i++; } catch (Exception ex) { } } sw.Close(); }
public void handlesQuotesInCommentsInScripts() { string html = "<script>\n" + " <!--\n" + " document.write('</scr' + 'ipt>');\n" + " // -->\n" + "</script>"; Document node = NSoupClient.ParseBodyFragment(html); Assert.AreEqual("<script>\n" + " <!--\n" + " document.write('</scr' + 'ipt>');\n" + " // -->\n" + "</script>", node.Body.Html()); }
//tu link tong ra link forder public void GetImagesLinkFromUrl2() { //List<ItemDown> listDown = new List<ItemDown>(); StreamWriter sw = new StreamWriter("data.txt", false); //create a new folder string targetfolder = ""; string directory; string foldername = CreateFolderName(this.url); if (foldername.LastIndexOf("?") > 0) { foldername = foldername.Substring(0, foldername.LastIndexOf("?")); } targetfolder = savepath + "\\" + foldername;; System.IO.Directory.CreateDirectory(targetfolder); List <string> listTempA = linkTongRaGa(url); foreach (string linkGall in listTempA) { List <string> listtemp = new List <string>(); StreamReader inStream; WebRequest webRequest; WebResponse webresponse; webRequest = WebRequest.Create(linkGall); webresponse = webRequest.GetResponse(); inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("img"); foreach (Element link in Links) { string imagelink = link.Parent.Attr("href"); if (!(imagelink == "") && !(imagelink == "/") && imagelink.IndexOf("photo") > 0) { listtemp.Add("http://www.imagefap.com" + imagelink); } } int i = 0; foreach (string galerylink in listtemp) { //lay link anh tu cac link galery try { WebRequest webRequest2 = WebRequest.Create(galerylink); WebResponse webresponse2 = webRequest2.GetResponse(); StreamReader inStream2 = new StreamReader(webresponse2.GetResponseStream()); String htmlstring2 = inStream2.ReadToEnd(); int count1 = htmlstring2.IndexOf("contentUrl") + 14; int count2 = htmlstring2.IndexOf("datePublished") - 7; string imagelink = htmlstring2.Substring(count1, count2 - count1); string savename = imagelink.Substring(imagelink.LastIndexOf("/") + 1); directory = targetfolder + "\\" + savename; sw.WriteLine(i + "#" + directory + "#" + imagelink + "#" + 0 + "#waiting"); i++; } catch (Exception ex) { } } // het for gallery } // het for linkALlgallry sw.Close(); } // end funtion
public override List <ItemDown> GetImagesLinkFromUrl() { List <ItemDown> listDown = new List <ItemDown>(); //create a new folder string targetfolder = ""; string directory; //tạo vị trí lưu string foldername = CreateFolderName(url); targetfolder = savepath + "\\" + foldername;; System.IO.Directory.CreateDirectory(targetfolder); //lay cac duong link gallery WebRequest webRequest = WebRequest.Create(url); WebResponse webresponse = webRequest.GetResponse(); StreamReader inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); List <string> listTemp = new List <string>(); Elements Links = doc.Select("a"); //loc ra duong link cuoi cung string lastListLink = ""; foreach (Element link in Links) { if ("page-numbers".Equals(link.ClassName())) { lastListLink = "http://www.bcfakes.com" + link.Attr("href"); //listTemp.Add(lastListLink); } } string temptest = ""; if (!lastListLink.Equals("")) { temptest = lastListLink.Substring(lastListLink.IndexOf("?nggpage=") + 9); int lastListIndex = Int32.Parse(temptest); for (int i = 1; i <= lastListIndex; i++) { string temp = lastListLink.Substring(0, lastListLink.IndexOf("?nggpage=") + 9) + i; listTemp.Add(temp); } } else { listTemp.Add(url); } int count = 0; foreach (string item in listTemp) { //down WebRequest webRequest2 = WebRequest.Create(item); WebResponse webresponse2 = webRequest2.GetResponse(); StreamReader inStream2 = new StreamReader(webresponse2.GetResponseStream()); String htmlstring2 = inStream2.ReadToEnd(); Document doc2 = NSoupClient.ParseBodyFragment(htmlstring2); Elements linkdown = doc2.Select("img"); //lay ra link trang chua anh, tu link nay moilay ra duoc anh, rat vai foreach (Element link in linkdown) { if ((link.Parent.Parent.Parent.Parent.ClassName().Equals("ngg-galleryoverview") && link.Parent.Parent.Parent.Parent.Id.Equals("ngg-gallery-43-267")) || link.Parent.Parent.ClassName().Equals("ngg-gallery-thumbnail")) { //string imagelink = link.Attr("src"); //imagelink = imagelink.Replace("/thumbs/thumbs_", "/"); string imagelink = link.Parent.Attr("href"); imagelink = "http://www.bcfakes.com" + imagelink; WebRequest webRequest3 = WebRequest.Create(imagelink); WebResponse webresponse3 = webRequest3.GetResponse(); StreamReader inStream3 = new StreamReader(webresponse3.GetResponseStream()); String htmlstring3 = inStream3.ReadToEnd(); Document doc3 = NSoupClient.ParseBodyFragment(htmlstring3); Elements linkdownFinals = doc3.Select("a"); int countItem = 0; foreach (Element linkdownFinal in linkdownFinals) { if (linkdownFinal.Parent.ClassName().Equals("pic")) { countItem++; string imagedownloadlink = linkdownFinal.Attr("href"); int int1 = imagedownloadlink.LastIndexOf("/") + 1; int int2 = imagedownloadlink.LastIndexOf("."); directory = targetfolder + @"\" + imagedownloadlink.Substring(int1, int2 - int1) + ".jpg"; ItemDown temp = new ItemDown(countItem, directory, imagedownloadlink, 0, "waiting"); listDown.Add(temp); count++; } } } } count++; } return(listDown); }
public virtual List <ItemDown> GetImagesLinkFromUrl() { List <ItemDown> listImage = new List <ItemDown>(); //create a new folder string targetfolder = ""; string directory; string foldername = CreateFolderName(this.url); try { targetfolder = savepath + "\\" + foldername; System.IO.Directory.CreateDirectory(targetfolder); } catch (Exception ex) { targetfolder = savepath + "\\Test"; System.IO.Directory.CreateDirectory(targetfolder); } try { HttpWebRequest myWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); myWebRequest.Method = "GET"; myWebRequest.UserAgent = "Foo"; myWebRequest.Accept = "text/html"; HttpWebResponse myWebResponse = (HttpWebResponse)myWebRequest.GetResponse(); StreamReader myWebSource = new StreamReader(myWebResponse.GetResponseStream()); string htmlstring = string.Empty; htmlstring = myWebSource.ReadToEnd(); myWebResponse.Close(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("img"); int i = 0; //tach lay cai link image va down ve foreach (Element link in Links) { string imagelink = link.Attr("abs:src"); if (i < 10) { directory = targetfolder + "\\" + savename + "00" + i + ".jpg"; } else if (i >= 10 && i < 100) { directory = targetfolder + "\\" + savename + "0" + i + ".jpg"; } else { directory = targetfolder + "\\" + savename + i + ".jpg"; } ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); listImage.Add(temp); i++; } return(listImage); } catch (Exception _ex) { StreamWriter sw = new StreamWriter(Directory.GetCurrentDirectory() + "log.txt", true); sw.WriteLine("Lỗi khi lấy link ảnh:"); sw.Write(_ex.ToString()); sw.Close(); return(null); } }
public string GetImagesLinkFromUrl5() { //List<ItemDown> listDown = new List<ItemDown>(); StreamWriter sw = new StreamWriter("data.txt", false); //create a new folder string targetfolder = ""; string directory; string foldername = url.Substring(url.LastIndexOf("\\") + 1); if (foldername.IndexOf(".html") > 0) { foldername = foldername.Replace(".html", ""); } targetfolder = string.Format("{0}\\{1}", savepath, foldername);; //lay link galery tu link chung List <string> listtemp = new List <string>(); String htmlstring = File.ReadAllText(url); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("img"); foreach (Element link in Links) { string imagelink = link.Attr("src"); if (imagelink == "") { imagelink = link.Attr("href"); } else { XElement xDoc = XElement.Load("Filter.xml"); listFilters = (from q in xDoc.Elements("Filter") select new filterOb(q.Element("Link").Value, q.Element("Source").Value, q.Element("Target").Value) ).ToList(); foreach (filterOb filter in listFilters) { if (!(imagelink == "") && !(imagelink == "/") && imagelink.IndexOf(filter.link) >= 0) { listtemp.Add(imagelink.Replace(filter.source, filter.target)); } } } } int i = 0; foreach (string imagelink in listtemp) { try { //string savename = foldername; if (i < 10) { directory = string.Format("{0}\\{1}_00{2}.jpg", targetfolder, foldername, i); } else if (i >= 10 && i < 100) { directory = string.Format("{0}\\{1}_0{2}.jpg", targetfolder, foldername, i); } else { directory = string.Format("{0}\\{1}_{2}.jpg", targetfolder, foldername, i); } sw.WriteLine(i + "#" + directory + "#" + imagelink + "#" + 0 + "#waiting"); i++; } catch (Exception ex) { } } sw.Close(); if (listtemp.Count > 0) { Directory.CreateDirectory(targetfolder); return("Done"); } else { return("Fail"); } }
public virtual void GetImagesLinkFromUrl3() { //List<ItemDown> listImage = new List<ItemDown>(); StreamWriter sw = new StreamWriter("data.txt", false); //create a new folder string targetfolder = ""; string directory; string foldername = CreateFolderName(this.url); try { targetfolder = savepath + "\\" + foldername; System.IO.Directory.CreateDirectory(targetfolder); } catch (Exception ex) { targetfolder = savepath + "\\Test"; System.IO.Directory.CreateDirectory(targetfolder); } try { string htmlstring = string.Empty; htmlstring = File.ReadAllText(url); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("img"); int i = 0; //tach lay cai link image va down ve foreach (Element link in Links) { string imagelink = link.Attr("abs:src"); if (i < 10) { directory = targetfolder + "\\" + savename + "00" + i + ".jpg"; } else if (i >= 10 && i < 100) { directory = targetfolder + "\\" + savename + "0" + i + ".jpg"; } else { directory = targetfolder + "\\" + savename + i + ".jpg"; } //ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); //listImage.Add(temp); sw.WriteLine(i + "#" + directory + "#" + imagelink + "#" + 0 + "#waiting"); i++; } //return listImage; sw.Close(); } catch (Exception _ex) { StreamWriter sw2 = new StreamWriter(Directory.GetCurrentDirectory() + "log.txt", true); sw2.WriteLine("Lỗi khi lấy link ảnh:"); sw2.Write(_ex.ToString()); sw2.Close(); sw.Close(); } }
public override List <ItemDown> GetImagesLinkFromUrl() { List <ItemDown> listDown = new List <ItemDown>(); //create a new folder string targetfolder = ""; string directory; //lay ten truyen -> tao duong dan cho gon string mangaName = url.Substring(0, url.Length - 1); mangaName = mangaName.Substring(mangaName.LastIndexOf("/") + 1); StreamReader inStream; WebRequest webRequest; WebResponse webresponse; webRequest = WebRequest.Create(url); webresponse = webRequest.GetResponse(); inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); List <string> listTemp = new List <string>(); //Buoc 1: tu link ten truyen lay ra duong link cua tung chap Elements Links = doc.Select("a"); foreach (Element link in Links) { if ("chapter-name".Equals(link.Parent.ClassName())) { string imagelink = link.SiblingElements.Text + "###" + link.Attr("href"); listTemp.Add(imagelink); } } //Buoc 2: tu link moi chap truyen dua ra cac link anh cua tung chap truyen foreach (string item in listTemp) { //tạo vị trí lưu string linkChap = item.Substring(item.LastIndexOf("###") + 3); string countChap = item.Substring(0, item.IndexOf("###")).Replace(@"\", "").Replace(@"/", "").Replace(@":", "") .Replace(@"*", "").Replace(@"?", "").Replace("\"", "").Replace(@"<", "").Replace(@">", "").Replace(@"|", ""); string foldername = CreateFolderName(linkChap); targetfolder = savepath + "\\" + mangaName + "\\" + countChap;// +"_" + foldername; System.IO.Directory.CreateDirectory(targetfolder); //down System.Windows.Forms.WebBrowser wb = new System.Windows.Forms.WebBrowser(); //WebClient wc = new WebClient(); wb.Navigate(url); string htmlstring2 = wb.DocumentText; //WebRequest webRequest2 = WebRequest.Create(linkChap); //WebResponse webresponse2 = webRequest2.GetResponse(); //StreamReader inStream2 = new StreamReader(webresponse2.GetResponseStream()); //String htmlstring2 = inStream2.ReadToEnd(); Document doc2 = NSoupClient.ParseBodyFragment(htmlstring2); Elements linkdown = doc2.Select("img"); int i = 0; foreach (Element link in linkdown) //foreach (string link in linkdown) { if (link.Parent.ClassName().Equals("viewer")) { //string imagelink = link.Replace(";", "").Replace(";", "").Replace("\r", "").Replace("\n", "").Replace("\t", ""); // link.Attr("src"); string imagelink = link.Attr("src"); if (i < 10) { directory = targetfolder + "\\" + savename + "00" + i + ".jpg"; } else if (i >= 10 && i < 100) { directory = targetfolder + "\\" + savename + "0" + i + ".jpg"; } else { directory = targetfolder + "\\" + savename + i + ".jpg"; } ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); listDown.Add(temp); i++; } } } return(listDown); }
public override void GetImagesLinkFromUrl2() { //List<ItemDown> listDown = new List<ItemDown>(); StreamWriter sw = new StreamWriter("data.txt", false); //create a new folder string targetfolder = ""; string directory; //tạo vị trí lưu string foldername = CreateFolderName(url); foldername = foldername.Replace("\\", "").Replace("/", "").Replace(":", "").Replace("*", "").Replace("?", ""). Replace("\"", "").Replace("~", "").Replace("<", "").Replace(">", "").Replace("|", "").Replace(".", "").Replace("+", ""); targetfolder = savepath + "\\" + foldername; System.IO.Directory.CreateDirectory(targetfolder); StreamReader inStream; WebRequest webRequest; WebResponse webresponse; webRequest = WebRequest.Create(url); webresponse = webRequest.GetResponse(); inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); List <string> listTemp = new List <string>(); Elements Links = doc.Select("a"); //string lastListLink = ""; foreach (Element link in Links) { if ("gallerybody".Equals(link.Parent.Parent.Parent.Parent.Parent.Parent.ClassName())) { string imageLink = "http://litugirls.urlgalleries.net" + link.Attr("href"); listTemp.Add(imageLink); } } foreach (string item in listTemp) { //down WebRequest webRequest2 = WebRequest.Create(item); WebResponse webresponse2 = webRequest2.GetResponse(); StreamReader inStream2 = new StreamReader(webresponse2.GetResponseStream()); String htmlstring2 = inStream2.ReadToEnd(); Document doc2 = NSoupClient.ParseBodyFragment(htmlstring2); Elements linkdown = doc2.Select("iframe"); int i = 0; //lay ra link trang chua anh, tu link nay moilay ra duoc anh, rat vai foreach (Element link in linkdown) { if (link.Id.Equals("thepic")) { i++; string imagelink = "http://img227.imagevenue.com/" + link.Attr("src"); //imagelink = imagelink.Replace("/thumbs/thumbs_", "/"); int int1 = imagelink.LastIndexOf("/") + 1; int int2 = imagelink.LastIndexOf("."); directory = targetfolder + @"\" + imagelink.Substring(int1, int2 - int1) + ".jpg"; //ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); //listDown.Add(temp); sw.WriteLine(i + "#" + directory + "#" + imagelink + "#" + 0 + "#waiting"); } } } sw.Close(); //return listDown; }
public override List <ItemDown> GetImagesLinkFromUrl() { List <ItemDown> listDown = new List <ItemDown>(); //create a new folder string targetfolder = ""; string directory; string foldername = CreateFolderName(this.url); targetfolder = savepath + "\\" + foldername;; System.IO.Directory.CreateDirectory(targetfolder); //lay link galery tu link chung List <string> listtemp = new List <string>(); StreamReader inStream; WebRequest webRequest; WebResponse webresponse; webRequest = WebRequest.Create(url); webresponse = webRequest.GetResponse(); inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("img"); foreach (Element link in Links) { string imagelink = link.Parent.Attr("href"); if (!(imagelink == "") && !(imagelink == "/")) { listtemp.Add("https://fuskator.com" + imagelink); //listtemp.Add(imagelink); } } foreach (string galerylink in listtemp) { //lay link anh tu cac link galery try { WebRequest webRequest2 = WebRequest.Create(galerylink); WebResponse webresponse2 = webRequest2.GetResponse(); StreamReader inStream2 = new StreamReader(webresponse.GetResponseStream()); String htmlstring2 = inStream2.ReadToEnd(); Document doc2 = NSoupClient.ParseBodyFragment(htmlstring2); Elements Links2 = doc2.Select("img"); int i = 0; //tach lay cai link image va down ve foreach (Element link in Links2) { string imagelink = "https:" + link.Parent.Attr("src"); string savename = imagelink.Substring(imagelink.LastIndexOf("/") + 1); directory = targetfolder + "\\" + savename; //if (i < 10) //directory = targetfolder + "\\" + savename + "00" + i + ".jpg"; //else if (i >= 10 && i < 100) //directory = targetfolder + "\\" + savename + "0" + i + ".jpg"; //else directory = targetfolder + "\\" + savename + i + ".jpg"; ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); listDown.Add(temp); i++; } } catch (Exception ex) { } } return(listDown); }
public override List <ItemDown> GetImagesLinkFromUrl() { List <ItemDown> listDown = new List <ItemDown>(); //create a new folder string targetfolder = ""; string directory; //lay ten truyen -> tao duong dan cho gon string mangaName = url.Substring(url.LastIndexOf("/") + 1).Replace(".html", ""); StreamReader inStream; WebRequest webRequest; WebResponse webresponse; webRequest = WebRequest.Create(url); webresponse = webRequest.GetResponse(); inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); List <string> listTemp = new List <string>(); //Buoc 1: tu link ten truyen lay ra duong link cua tung chap Elements Links = doc.Select("a"); foreach (Element link in Links) { if ("item-even".Equals(link.Parent.Parent.ClassName()) || "item-odd".Equals(link.Parent.Parent.ClassName())) { string imagelink = link.SiblingElements.Text + "###" + "http://manga24h.com/" + link.Attr("href"); listTemp.Add(imagelink); } } //Buoc 2: tu link moi chap truyen dua ra cac link anh cua tung chap truyen foreach (string item in listTemp) { //tạo vị trí lưu string linkChap = item.Substring(item.LastIndexOf("###") + 3); string countChap = item.Substring(0, item.IndexOf("###")).Replace(@"\", "").Replace(@"/", "").Replace(@":", "") .Replace(@"*", "").Replace(@"?", "").Replace("\"", "").Replace(@"<", "").Replace(@">", "").Replace(@"|", ""); string foldername = CreateFolderName(linkChap); targetfolder = savepath + "\\" + mangaName + "\\" + countChap;// +"_" + foldername; System.IO.Directory.CreateDirectory(targetfolder); //down WebRequest webRequest2 = WebRequest.Create(linkChap); WebResponse webresponse2 = webRequest2.GetResponse(); StreamReader inStream2 = new StreamReader(webresponse2.GetResponseStream()); String htmlstring2 = inStream2.ReadToEnd(); int Start = htmlstring2.IndexOf("data='") + 6; int Stop = htmlstring2.IndexOf("images = new Array();"); string datacontent = htmlstring2.Substring(Start, Stop - Start); datacontent = datacontent.Substring(0, datacontent.LastIndexOf("';")); string[] linkdown = datacontent.Split('|'); //Document doc2 = NSoupClient.ParseBodyFragment(htmlstring2); //Elements linkdown = doc2.Select("img"); int i = 0; //foreach (Element link in linkdown) foreach (string link in linkdown) { //if (link.Parent.ClassName().Equals("view2")) //{ string imagelink = link.Replace(";", "").Replace("\r", "").Replace("\n", "").Replace("\t", ""); // link.Attr("src"); if (i < 10) { directory = targetfolder + "\\" + savename + "00" + i + ".jpg"; } else if (i >= 10 && i < 100) { directory = targetfolder + "\\" + savename + "0" + i + ".jpg"; } else { directory = targetfolder + "\\" + savename + i + ".jpg"; } ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); listDown.Add(temp); i++; //} } } return(listDown); }
public void GetImagesLinkFromUrl2() { //List<ItemDown> listDown = new List<ItemDown>(); StreamWriter sw = new StreamWriter("data.txt", false); //create a new folder string targetfolder = ""; string directory; string foldername = url.Substring(url.LastIndexOf("/") + 1); targetfolder = savepath + "\\" + foldername;; System.IO.Directory.CreateDirectory(targetfolder); //lay link galery tu link chung List <string> listtemp = new List <string>(); StreamReader inStream; WebRequest webRequest; WebResponse webresponse; webRequest = WebRequest.Create(url); webresponse = webRequest.GetResponse(); inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); Elements Links = doc.Select("img"); foreach (Element link in Links) { string imagelink = link.Parent.Attr("href"); if (!(imagelink == "") && !(imagelink == "/") && imagelink.IndexOf("coreimg.net") > 0) { listtemp.Add(imagelink); //listtemp.Add(imagelink); } } foreach (string galerylink in listtemp) { //lay link anh tu cac link galery try { WebRequest webRequest2 = WebRequest.Create(galerylink); WebResponse webresponse2 = webRequest2.GetResponse(); StreamReader inStream2 = new StreamReader(webresponse2.GetResponseStream()); String htmlstring2 = inStream2.ReadToEnd(); Document doc2 = NSoupClient.ParseBodyFragment(htmlstring2); Elements Links2 = doc2.Select("img"); int i = 0; //tach lay cai link image va down ve foreach (Element link in Links2) { string imagelink = link.Attr("src"); //string savename = imagelink.Substring(imagelink.LastIndexOf("/") + 1); //directory = targetfolder + "\\" + savename; string savename = ""; if (i < 10) { directory = targetfolder + "\\" + savename + "00" + i + ".jpg"; } else if (i >= 10 && i < 100) { directory = targetfolder + "\\" + savename + "0" + i + ".jpg"; } else { directory = targetfolder + "\\" + savename + i + ".jpg"; } //ItemDown temp = new ItemDown(i, directory, imagelink, 0, "waiting"); //listDown.Add(temp); sw.WriteLine(i + "#" + directory + "#" + imagelink + "#" + 0 + "#waiting"); i++; } } catch (Exception ex) { } } sw.Close(); }
public void GetImagesLinkFromUrl2() { StreamWriter sw = new StreamWriter("data.txt", false); //List<ItemDown> listDown = new List<ItemDown>(); //create a new folder string targetfolder = ""; string directory; //tạo vị trí lưu string foldername = CreateFolderName(url); targetfolder = savepath + "\\" + foldername;; System.IO.Directory.CreateDirectory(targetfolder); //lay cac duong link gallery WebRequest webRequest = WebRequest.Create(url); WebResponse webresponse = webRequest.GetResponse(); StreamReader inStream = new StreamReader(webresponse.GetResponseStream()); String htmlstring = inStream.ReadToEnd(); Document doc = NSoupClient.ParseBodyFragment(htmlstring); List <string> listTemp = new List <string>(); Elements Links = doc.Select("a"); string imagedownloadlink = ""; //lay ra link trang chua anh, tu link nay moilay ra duoc anh, rat vai foreach (Element link in Links) { if (link.Parent.ClassName().Equals("ngg-gallery-thumbnail")) { string imagelink = link.Attr("href"); imagelink = "http://www.bcfakes.com" + imagelink; WebRequest webRequest3 = WebRequest.Create(imagelink); WebResponse webresponse3 = webRequest3.GetResponse(); StreamReader inStream3 = new StreamReader(webresponse3.GetResponseStream()); String htmlstring3 = inStream3.ReadToEnd(); Document doc3 = NSoupClient.ParseBodyFragment(htmlstring3); Elements linkdownFinals = doc3.Select("a"); foreach (Element linkdownFinal in linkdownFinals) { if (linkdownFinal.Parent.ClassName().Equals("pic")) { imagedownloadlink = linkdownFinal.Attr("href"); break; } } } if (!imagedownloadlink.Equals("")) { break; } } string temptemp = imagedownloadlink.Substring(imagedownloadlink.LastIndexOf("_") + 1, imagedownloadlink.LastIndexOf(".") - imagedownloadlink.LastIndexOf("_") - 1); int SoChuSo0 = temptemp.Length; int lastindex = int.Parse(temptemp); string LinkdataDown = imagedownloadlink.Substring(0, imagedownloadlink.LastIndexOf("_") + 1); for (int i = 1; i <= lastindex; i++) { string StringAdd = ""; int Dem = i.ToString().Length; for (int k = Dem; k < SoChuSo0; k++) { StringAdd = StringAdd + "0"; } string linkdataaddtodownload = ""; linkdataaddtodownload = LinkdataDown + StringAdd + i + ".jpg"; int int1 = linkdataaddtodownload.LastIndexOf("/") + 1; int int2 = linkdataaddtodownload.LastIndexOf("."); directory = targetfolder + @"\" + linkdataaddtodownload.Substring(int1, int2 - int1) + ".jpg"; sw.WriteLine(i + "#" + directory + "#" + linkdataaddtodownload + "#" + 0 + "#waiting"); //ItemDown temp = new ItemDown(i, directory, linkdataaddtodownload,0, "waiting"); //listDown.Add(temp); } //return listDown; sw.Close(); }