Exemple #1
0
        public override void HtmlCapture(string webHtml)
        {
            var text = ToTraditional(webHtml);

            text = Regex.Replace(text, "(<!D).+?(<div id=\"contentmain)", "<");   //文庫去頭
            text = Regex.Replace(text, "(</ul></div>).+?(html>)", "</ul>");       //文庫去尾
            text = Regex.Replace(text, "&nbsp;", "-空白-");
            text = Regex.Replace(text, "<ul id=\"contentdp\">", "-換行-");
            text = Regex.Replace(text, "<br />", "-換行-");
            text = Regex.Replace(text, "(<ul).+?(</ul>)", string.Empty); //細切
            text = Regex.Replace(text, "<.+?>", " ");                    //去除其餘tag
            text = Regex.Replace(text, "-空白-", "&nbsp;");
            text = Regex.Replace(text, "-換行-", "<br />");
            NovelText.Add(text);
        }
Exemple #2
0
        public override void HtmlCapture(string webHtml)
        {
            string str  = webHtml;
            string line = "";

            str = Regex.Replace(str, "(<!D).+?(<!-- END -->)", string.Empty);  //卡提諾第一頁 切頭
            str = Regex.Replace(str, "(<td class=\"t_f).+?(\">)", "--頭--頭--"); //卡提諾每章節開頭tag
            str = Regex.Replace(str, "</td></tr>", "~尾~~--");                  //卡提諾每章節結尾tag
            str = "~--" + str + "--頭-";                                        //添加頭尾tag
            str = Regex.Replace(str, "(~--).+?(--頭-)", "\n\n");                //切除尾~頭之間的所有內容
            //str = Regex.Replace(str, "&nbsp;", " ");
            MatchCollection matches = Regex.Matches(str, "(?<=-頭--).+?(?=~尾~)", RegexOptions.IgnoreCase | RegexOptions.Multiline);

            foreach (Match match in matches)
            {
                line = Regex.Replace(match.Value, "<br />", "-換行-");
                line = Regex.Replace(line, "<.+?>", " ");
                line = Regex.Replace(line, "-換行-", "<br />");
                NovelText.Add(line);
            }
        }
        /*
         * 小説のダウンロードを行う。
         *
         *
         * */
        public void DownloadNovel()
        {
            var urls  = getNovelUrl();
            var text  = new List <string>();
            var title = new List <string>();

            foreach (string url in urls)
            {
                string html = getHtml(url);
                text.Add(analysisHtml(html));
                if (Juge)
                {
                    title.Add(analysisHtmTitlel(html));
                }
            }
            NovelText.Add(Ncode, text);
            if (Juge)
            {
                Title.Add(Ncode, title);
            }

            return;
        }