public override void HtmlCapture(string webHtml) { var text = ToTraditional(webHtml); text = Regex.Replace(text, "(<!D).+?(<div id=\"contentmain)", "<"); //文庫去頭 text = Regex.Replace(text, "(</ul></div>).+?(html>)", "</ul>"); //文庫去尾 text = Regex.Replace(text, " ", "-空白-"); text = Regex.Replace(text, "<ul id=\"contentdp\">", "-換行-"); text = Regex.Replace(text, "<br />", "-換行-"); text = Regex.Replace(text, "(<ul).+?(</ul>)", string.Empty); //細切 text = Regex.Replace(text, "<.+?>", " "); //去除其餘tag text = Regex.Replace(text, "-空白-", " "); text = Regex.Replace(text, "-換行-", "<br />"); NovelText.Add(text); }
public override void HtmlCapture(string webHtml) { string str = webHtml; string line = ""; str = Regex.Replace(str, "(<!D).+?(<!-- END -->)", string.Empty); //卡提諾第一頁 切頭 str = Regex.Replace(str, "(<td class=\"t_f).+?(\">)", "--頭--頭--"); //卡提諾每章節開頭tag str = Regex.Replace(str, "</td></tr>", "~尾~~--"); //卡提諾每章節結尾tag str = "~--" + str + "--頭-"; //添加頭尾tag str = Regex.Replace(str, "(~--).+?(--頭-)", "\n\n"); //切除尾~頭之間的所有內容 //str = Regex.Replace(str, " ", " "); MatchCollection matches = Regex.Matches(str, "(?<=-頭--).+?(?=~尾~)", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match match in matches) { line = Regex.Replace(match.Value, "<br />", "-換行-"); line = Regex.Replace(line, "<.+?>", " "); line = Regex.Replace(line, "-換行-", "<br />"); NovelText.Add(line); } }
/* * 小説のダウンロードを行う。 * * * */ public void DownloadNovel() { var urls = getNovelUrl(); var text = new List <string>(); var title = new List <string>(); foreach (string url in urls) { string html = getHtml(url); text.Add(analysisHtml(html)); if (Juge) { title.Add(analysisHtmTitlel(html)); } } NovelText.Add(Ncode, text); if (Juge) { Title.Add(Ncode, title); } return; }