public static void GenerateTableOfContents(Syousetsu.Constants details, HtmlDocument doc) { //create novel folder if it doesn't exist CheckDirectory(details); HtmlNode ptitleNode; // page title HtmlNode stitleNode; // series title HtmlNode titleNode; // novel title HtmlNode writerNode; // author HtmlNode tocNode; // table of contents if (details.Site() == Constants.SiteType.Syousetsu) { ptitleNode = doc.DocumentNode.SelectSingleNode("//title"); stitleNode = doc.DocumentNode.SelectSingleNode("//p[@class='series_title']"); titleNode = doc.DocumentNode.SelectSingleNode("//p[@class='novel_title']"); writerNode = doc.DocumentNode.SelectSingleNode("//div[@class='novel_writername']"); tocNode = doc.DocumentNode.SelectSingleNode("//div[@class='index_box']"); } else // kakuyomu { ptitleNode = doc.DocumentNode.SelectSingleNode("//title"); stitleNode = null; titleNode = doc.DocumentNode.SelectSingleNode("//h1[@id='workTitle']"); writerNode = doc.DocumentNode.SelectSingleNode("//h2[@id='workAuthor']"); tocNode = doc.DocumentNode.SelectSingleNode("//section[@class='widget-toc']"); // remove left header tocNode.ChildNodes["header"].Remove(); } HtmlNodeCollection cssNodeList = doc.DocumentNode.SelectNodes("//link[@rel='stylesheet']"); var cssNode = (from n in cssNodeList where n.Attributes["href"].Value.Contains("ncout.css") || n.Attributes["href"].Value.Contains("ncout2.css") || n.Attributes["href"].Value.Contains("kotei.css") || // ... n.Attributes["href"].Value.Contains("reset.css") || // syousetsu n.Attributes["href"].Value.Contains("kakuyomu.css") // kakuyomu select n).ToList(); //get css link and download List <string> cssink = new List <string>(); string pattern; Regex r; Match m; foreach (HtmlNode node in cssNode) { if (details.Site() == Constants.SiteType.Syousetsu) // syousetsu { pattern = "(href=\")(?<link>.+)(?=\" media)"; } else // kakuyomu { pattern = "(href=\")(?<link>.+)(?=\">)"; } r = new Regex(pattern); m = r.Match(node.OuterHtml); cssink.Add(m.Groups["link"].Value); } DownloadCss(details, cssink); StringBuilder sb = new StringBuilder(); sb.AppendLine("<html>"); sb.AppendLine("<head>"); sb.AppendLine("\t<meta charset=\"UTF-8\">"); sb.AppendLine("\t<link rel=\"stylesheet\" type=\"text/css\" href=\"" + details.SeriesCode + ".css\" media=\"screen,print\" />"); if (ptitleNode != null) { sb.AppendLine(ptitleNode.OuterHtml); } sb.AppendLine("</head>"); if (details.Site() == Constants.SiteType.Syousetsu) // syousetsu { sb.AppendLine("<body>"); } else // kakuyomu { // copy <body> as is for it's id HtmlNode body = doc.DocumentNode.SelectSingleNode("//body"); sb.AppendLine(body.OuterHtml.Substring(0, body.OuterHtml.IndexOf('>') + 1)); } // restore header links if (stitleNode != null) // syousetsu { if (null != stitleNode.ChildNodes["a"] && null != stitleNode.ChildNodes["a"].Attributes["href"]) { var href = stitleNode.ChildNodes["a"].Attributes["href"].Value; if (!string.IsNullOrEmpty(href)) { stitleNode.ChildNodes["a"].Attributes["href"].Value = "https://ncode.syosetu.com" + href; } } sb.AppendLine(stitleNode.OuterHtml); } else // kakuyomu { HtmlNode title_node = doc.DocumentNode.SelectSingleNode("//h1[@id='workTitle']/a"); HtmlNode author_node = doc.DocumentNode.SelectSingleNode("//h2[@id='workAuthor']/span[@id='workAuthor-activityName']/a"); if (title_node != null) { string s = null; s = title_node.Attributes["href"]?.Value; if (!string.IsNullOrEmpty(s)) { title_node.Attributes["href"].Value = "https://kakuyomu.jp" + s; } } if (author_node != null) { string s = null; s = author_node.Attributes["href"]?.Value; if (!string.IsNullOrEmpty(s)) { author_node.Attributes["href"].Value = "https://kakuyomu.jp" + s; } } } if (titleNode != null) { sb.AppendLine(titleNode.OuterHtml); } if (writerNode != null) { sb.AppendLine(writerNode.OuterHtml); } //edit all href int i = 1; HtmlNodeCollection chapterNode; if (details.Site() == Constants.SiteType.Syousetsu) // syousetsu { chapterNode = doc.DocumentNode.SelectNodes("//div[@class='index_box']/dl[@class='novel_sublist2']"); } else // kakuyomu { chapterNode = doc.DocumentNode.SelectNodes("//section[@class='widget-toc']/div[@class='widget-toc-main']/ol/li[@class='widget-toc-episode']"); } foreach (HtmlNode node in chapterNode) { //get current chapter number if (details.Site() == Constants.SiteType.Syousetsu) // syousetsu { pattern = "(href=\"/)(?<series>.+)/(?<num>.+)/\">(?<title>.+)(?=</a>)"; r = new Regex(pattern); m = r.Match(node.ChildNodes["dd"].OuterHtml); //int current = Convert.ToInt32(m.Groups["num"].Value); //edit href string fileName = details.FilenameFormat; fileName = String.Format(fileName + ".htm", i, details.GetChapterByIndex(i).title, details.SeriesCode); node.ChildNodes["dd"].ChildNodes["a"].Attributes["href"].Value = "./" + fileName; node.ChildNodes["dd"].ChildNodes["a"].InnerHtml = "(" + i + ") " + node.ChildNodes["dd"].ChildNodes["a"].InnerHtml; } else // kakuyomu { pattern = "(href=\"/works/)(?<series>.+)/episodes/(?<num>.+)\" class.+item\">(?<title>.+)(?=</span>)"; r = new Regex(pattern); m = r.Match(node.OuterHtml); //edit href string fileName = details.FilenameFormat; fileName = String.Format(fileName + ".htm", i, details.GetChapterByIndex(i).title, details.SeriesCode); node.ChildNodes["a"].Attributes["href"].Value = "./" + fileName; node.ChildNodes["a"].ChildNodes["span"].InnerHtml = "(" + i + ") " + node.ChildNodes["a"].ChildNodes["span"].InnerHtml; } if (i <= Convert.ToInt32(details.End)) { CheckDirectory(details, i); } i++; } sb.AppendLine(tocNode.OuterHtml); sb.AppendLine("</body>"); sb.AppendLine("</html>"); File.WriteAllText(Path.Combine(details.Path, details.SeriesTitle, details.SeriesCode + ".htm"), sb.ToString()); }
public static CancellationTokenSource AddDownloadJob(Syousetsu.Constants details, ProgressBar pb, Label lb) { int max = Convert.ToInt32(pb.Maximum); int i = 0; int upTo = -1; if (details.Start != String.Empty && details.End == String.Empty)//determine if user don't want to start at chapter 1 { i = Convert.ToInt32(details.Start); } else if (details.Start == String.Empty && details.End != String.Empty)//determine if user wants to end at a specific chapter { i = 1; upTo = max; } else if (details.Start != String.Empty && details.End != String.Empty) //determine if user only wants to download a specifc range { i = Convert.ToInt32(details.Start); //get start of the range upTo = max; //get the end of the range } else { i = 1;//if both textbox are blank assume user wants to start from the first chapter "http://*.syosetu.com/xxxxxxx/1" until the latest/last one "http://*.syosetu.com/xxxxxxx/*" } CancellationTokenSource ct = new CancellationTokenSource(); Task.Factory.StartNew(() => { bool cancelled = false; for (int ctr = i; ctr <= max; ctr++) { string subLink; if (details.Site() == Constants.SiteType.Syousetsu) // syousetsu { subLink = details.Link + ctr; } else // kakuyomu { subLink = details.Link + "/episodes/" + details.GetChapterByIndex(ctr).number; } string[] chapter = Create.GenerateContents(details, GetPage(subLink, details), ctr); Create.SaveFile(details, chapter, ctr); // update downloaded history details.LastDownloaded = ctr; History.SaveNovel(details); pb.Dispatcher.Invoke((Action)(() => { pb.Value = ctr; })); if (upTo != -1 && ctr > upTo)//stop loop if the specifed range is reached { break; } if (ct.IsCancellationRequested) { // another thread decided to cancel cancelled = true; break; } } pb.Dispatcher.Invoke((Action)(() => { //pb.Value = max; pb.ToolTip = null; pb.Tag = 1; if (cancelled) { lb.Content = "download aborted - " + lb.Content; //lb.Background = Brushes.MistyRose; } else { pb.Value = max; lb.Content = "finished - " + lb.Content; //lb.Background = Brushes.Aquamarine; } })); }, ct.Token); return(ct); }