private async Task GetChapterAsync(string line) { if (line == "") { return; } chapterLinks.RemoveRange(0, chapterLinks.Count); int urlEnd = (line.IndexOf('&') == -1) ? line.Length : line.IndexOf('&'); using (WebClient client = new WebClient()) { int i = 0; string firstLink = "Thanks for looking through my source code lol"; bool checkedForLink, foundEnd = false; while (true) { i++; string html = await client.DownloadStringTaskAsync(line.Substring(0, urlEnd) + "&page=" + i); var doc = new HtmlAgilityPack.HtmlDocument(); //HtmlAgility magic doc.LoadHtml(html); var div = doc.GetElementbyId("_listUl"); HtmlNodeCollection childNodes = div.ChildNodes; checkedForLink = false; for (int j = 0; j < childNodes.Count; j++) { if (childNodes[j].HasChildNodes) { if (!checkedForLink && firstLink != childNodes[j].ChildNodes[1].Attributes["href"].Value) //stop if no higher page count could be found { firstLink = childNodes[j].ChildNodes[1].Attributes["href"].Value; checkedForLink = true; } else if (!checkedForLink) { foundEnd = true; break; } chapterLinks.Add(childNodes[j].ChildNodes[1].Attributes["href"].Value); //link of the chapter chapterNames.Add(childNodes[j].ChildNodes[1].ChildNodes[3].ChildNodes[0].InnerHtml); //name of the chapter } } if (foundEnd) { break; } } } chapterLinks.Reverse(); absoluteChapterNR += chapterLinks.Count; // add all chapter links and the chapter names of the just scrapped site to the full list of the comic string[] tempChapterLinks = new string[chapterLinks.Count]; for (int i = 0; i < chapterLinks.Count; i++) { tempChapterLinks[i] = chapterLinks[i]; } ToonChapters.Add(tempChapterLinks); chapterNames.Reverse(); string[] tempChapterNames = new string[chapterNames.Count]; for (int i = 0; i < chapterNames.Count; i++) { tempChapterNames[i] = chapterNames[i]; } ToonChapterNames.Add(tempChapterNames); }
private async Task GetChapterAsync(string line) { if (line == "") { return; } chapterLinks.RemoveRange(0, chapterLinks.Count); int urlEnd = (line.IndexOf('&') == -1) ? line.Length : line.IndexOf('&'); using (WebClient client = new WebClient()) { int i = 0; string firstLink = "Thanks for looking through my source code lol"; bool checkedForLink, foundEnd = false; while (true) { i++; processInfo.Invoke((MethodInvoker) delegate { processInfo.Text = $"scoping tab {i}"; }); //run on the UI thread client.Headers.Add("Cookie", "pagGDPR=true;"); //add cookies to bypass age verification IWebProxy proxy = WebRequest.DefaultWebProxy; //add default proxy client.Proxy = proxy; string html = await client.DownloadStringTaskAsync(line.Substring(0, urlEnd) + "&page=" + i); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); //HtmlAgility magic doc.LoadHtml(html); HtmlNode div = doc.GetElementbyId("_listUl"); HtmlNodeCollection childNodes = div.ChildNodes; checkedForLink = false; for (int j = 0; j < childNodes.Count; j++) { if (childNodes[j].HasChildNodes) { if (!checkedForLink && firstLink != childNodes[j].ChildNodes[1].Attributes["href"].Value) //stop if no higher page count could be found { firstLink = childNodes[j].ChildNodes[1].Attributes["href"].Value; checkedForLink = true; } else if (!checkedForLink) { foundEnd = true; break; } chapterLinks.Add(childNodes[j].ChildNodes[1].Attributes["href"].Value); //link of the chapter chapterNames.Add(childNodes[j].ChildNodes[1].ChildNodes[3].ChildNodes[0].InnerHtml); //name of the chapter } } if (foundEnd) { break; } } } chapterLinks.Reverse(); absoluteChapterNR += chapterLinks.Count; // add all chapter links and the chapter names of the just scrapped site to the full list of the comic string[] tempChapterLinks = new string[chapterLinks.Count]; for (int i = 0; i < chapterLinks.Count; i++) { tempChapterLinks[i] = chapterLinks[i]; } ToonChapters.Add(tempChapterLinks); chapterNames.Reverse(); string[] tempChapterNames = new string[chapterNames.Count]; //sanitize the names of chapters! string invalidChars = new string(Path.GetInvalidFileNameChars()) + new string(Path.GetInvalidPathChars()); for (int i = 0; i < chapterNames.Count; i++) { foreach (char c in invalidChars) { chapterNames[i] = chapterNames[i].Replace(c.ToString(), ""); } tempChapterNames[i] = chapterNames[i]; } ToonChapterNames.Add(tempChapterNames); }