public override string GetText(Chapter chp, HtmlDocument use, WebClient wc) { wc.Headers = IAppBase.GenerateHeaders(chp.chapterLink.Host); string dwnld = wc.DownloadString(chp.chapterLink); use.LoadHtml(dwnld); GC.Collect(); return(use.DocumentNode.FindAllNodes().GetFirstElementByClassNameA("chp_raw").InnerText); }
public override string GetText(Chapter chp, HtmlDocument use, WebClient wc) { HtmlNodeCollection b; StringBuilder sb = new StringBuilder(); // Git controls in visual studio are f*****g horrible, and I had to rewrite this TWICE. Only if Git Bash wasn't being deprecated... wc.Headers = IAppBase.GenerateHeaders(chp.chapterLink.Host); string dwnld; Retry :; try { dwnld = wc.DownloadString(chp.chapterLink); } catch { goto Retry; } use.LoadHtml(dwnld); b = use.DocumentNode.SelectNodes("//div[contains(@class, 'chapter-c')]"); HtmlNode[] scripts = b[0].DescendantNodes().Where(x => x.XPath.Contains("/script")).ToArray(); foreach (HtmlNode n in scripts) { n.RemoveAll(); } use.LoadHtml(b[0].InnerHtml); b = use.DocumentNode.SelectNodes("//text()[normalize-space(.) != '']"); foreach (HtmlNode htmln in b) { sb.AppendLine(htmln.InnerText + "\n"); } GC.Collect(); return(HttpUtility.HtmlDecode(sb.ToString())); }