public override bool Download() { CurrentParameter.IsStop = false; //排版插件 var typeSetting = new Collection <ITypeSetting> { new BrRegex(), new HtmlDecode(), new UniformFormat(), new Traditional() }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { string url = string.Format("http://www.ranwen.net/files/article/{0}/{1}/{2}.html", (CommonTools.TryParse(TaskInfo.Tid, 0) / 1000).ToString(CultureInfo.InvariantCulture), TaskInfo.Tid, SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); try { string tempTextFile = htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"bgdiv\"]/table[2]/tbody/tr[1]/td/div[1]/h1").InnerText + "\r\n"; var node = htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"content\"]"); Network.RemoveSubHtmlNode(node, "div"); tempTextFile += node.InnerHtml + "\r\n"; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; string urlHead = string.Format("http://book.sfacg.com/Novel/{0}", TaskInfo.Tid); //string urlTail = ".html?charset=big5"; //排版插件 var typeSetting = new Collection <ITypeSetting> { new SfacgToIndent(), new HtmlDecode(), new UniformFormat(), new Traditional() }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { string url = urlHead + SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture);//組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); try { var nodeHeaders = htmlRoot.DocumentNode.SelectSingleNode(@"//*[@id=""ChapterBody""]"); Network.RemoveSubHtmlNode(nodeHeaders, "img"); string tempTextFile = nodeHeaders.InnerHtml; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; string urlHead = "http://lknovel.lightnovel.cn/main/view/"; string urlTail = ".html?charset=big5"; //排版插件 var typeSetting = new Collection <ITypeSetting> { new HtmlDecode(), new UniformFormat() }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { string url = urlHead + SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture) + urlTail;//組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); try { var nodeHeaders = htmlRoot.DocumentNode.SelectSingleNode(@"//*[@id=""J_view""]"); string tempTextFile = nodeHeaders.InnerText; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; //排版插件 var typeSetting = new Collection <ITypeSetting> { new BrRegex(), new HtmlDecode(), new UniformFormat(), new Traditional() }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { try { string url = string.Format("http://www.biquge.com/{0}/{1}.html", TaskInfo.Tid, SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); string tempTextFile = htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"wrapper\"]/div[3]/div[1]/div[2]/h1").InnerText + "\r\n" + htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"content\"]").InnerHtml + "\r\n"; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; //排版插件 var typeSetting = new Collection <ITypeSetting> { new HtmlDecode(), new UniformFormat(), new HjwzwRegex() }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { try { string url = string.Format("http://tw.hjwzw.com/Book/Read/{0},{1}", TaskInfo.Tid, SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); string tempTextFile = htmlRoot.DocumentNode.SelectSingleNode("/html/body/table[7]/tr/td/div[5]").InnerText; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; Regex r = new Regex(@"(?<Head>^https?:\/\/\w*\.*ck101.com\/thread-\d+-)(?<CurrentPage>\d+)(?<Tail>-\w+\.html)"); Match m = r.Match(TaskInfo.Url); string urlHead = string.Empty, urlTail = string.Empty; if (m.Success) { urlHead = m.Groups["Head"].Value; urlTail = m.Groups["Tail"].Value; } HtmlNodeCollection nodeHeaders = null; int lastPage = 0; //排版插件 var typeSetting = new Collection <ITypeSetting> { new HtmlDecode(), new UniformFormat() }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { //要下載的頁數 try { int newCurrentPage = (TaskInfo.BeginSection + TaskInfo.PageSection - 1) / TaskInfo.PageSection; if (lastPage != newCurrentPage) //之前下載的頁數跟當前要下載的頁數 { lastPage = newCurrentPage; //記錄下載頁數,下次如果一樣就不用重抓 string url = urlHead + lastPage.ToString(CultureInfo.InvariantCulture) + urlTail; //組合網址 if (lastPage == 1) //卡提諾第一頁的特別處理 { switch (TaskInfo.FailTimes % 2) //常常取不到完整資料,用多個網址取 { case 0: url = string.Format("https://ck101.com/thread-{0}-1-1.html", TaskInfo.Tid); break; case 1: url = string.Format("https://m.ck101.com/forum.php?mod=redirect&ptid={0}&authorid=0&postno=1", TaskInfo.Tid); break; case 2: url = string.Format("https://m.ck101.com/forum.php?mod=redirect&ptid={0}&authorid=0&postno=1", TaskInfo.Tid); break; } } HtmlDocument htmlRoot = GetHtmlDocumentReplaceDivToEmpty(url); if (htmlRoot != null) { nodeHeaders = htmlRoot.DocumentNode.SelectNodes("//*[@class=\"t_f\"]"); } } //計算要取的區塊在第幾個 int partSection = TaskInfo.BeginSection - ((lastPage - 1) * TaskInfo.PageSection) - 1; if (nodeHeaders == null) { throw new Exception("下載資料為空的"); } Network.RemoveSubHtmlNode(nodeHeaders[partSection], "div"); Network.RemoveSubHtmlNode(nodeHeaders[partSection], "ignore_js_op"); Network.RemoveSubHtmlNode(nodeHeaders[partSection], "i"); Network.RemoveSubHtmlNode(nodeHeaders[partSection], "script"); string tempTxt = nodeHeaders[partSection].InnerText; foreach (var item in typeSetting) { item.Set(ref tempTxt); } FileWrite.TxtWrire(tempTxt, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; lastPage = 0; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; //排版插件 var typeSetting = new Collection <ITypeSetting> { new AnnotationRegex(), new BrRegex(), new PRegex(), new HtmlDecode(), new UniformFormat(), }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { string url = string.Format("http://big5.quanben5.com/n/{0}/{1}.html", TaskInfo.Tid, SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); try { Regex r = new Regex(@"ajax_post\('book','ajax_content','pinyin','(?<pinyin>\S+)','content_id','(?<content_id>\d+)','sky','(?<sky>\S+)','t','(?<t>\d+)'\)"); Match m = r.Match(htmlRoot.DocumentNode.InnerHtml); string formData = ""; if (m.Success) { string timestamp = Convert.ToInt32(DateTime.UtcNow.AddHours(8).Subtract(new DateTime(1970, 1, 1)).TotalSeconds).ToString() + "000"; formData = string.Format("pinyin={0}&content_id={1}&sky={2}&t={3}&_type=ajax&rndval={4}", m.Groups["pinyin"].Value, m.Groups["content_id"].Value, m.Groups["sky"].Value, m.Groups["t"].Value, timestamp ); } HtmlDocument htmlPostResponse = PostHtmlDocument(formData); var titleNode = htmlRoot.DocumentNode.SelectSingleNode("/html/body/div[3]/div/div[2]/h1"); //Network.RemoveSubHtmlNode(node, "div"); string tempTextFile = titleNode.InnerText + "\r\n" + htmlPostResponse.DocumentNode.InnerHtml + "\r\n"; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { CurrentParameter.IsStop = false; //排版插件 var typeSetting = new Collection <ITypeSetting> { //new AnnotationRegex(), new Remove0007(), new BrRegex(), //new PRegex(), new HtmlDecode(), new UniformFormat(), }; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { string url = string.Format("https://www.wfxs.org/html/{0}/{1}.html", TaskInfo.Tid, SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址 try { string htmlstring = GetHtmlString(url); string content = ""; Regex r = new Regex(@"<a href=""\/html\/\d+\/"">.+?<\/a>(?<content>.+?)<script>style_4\(\);<\/script>", RegexOptions.Singleline); Match m = r.Match(htmlstring); if (m.Success) { content = m.Groups["content"].Value; } HtmlDocument htmlRoot = Network.GetHtmlDocument(content); var node = htmlRoot.DocumentNode; Network.RemoveSubHtmlNode(node, "div"); string tempTextFile = node.InnerHtml + "\r\n"; foreach (var item in typeSetting) { item.Set(ref tempTextFile); } FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }
public override bool Download() { //Regex r = new Regex(@"(?<Head>^http:\/\/\w*\.eyny.com\/thread-\d+-)(?<CurrentPage>\d+)(?<Tail>-\w+\.html)"); //Match m = r.Match(TaskInfo.Url); string urlHead = string.Empty, urlTail = string.Empty; //if (m.Success) //{ // urlHead = m.Groups["Head"].Value; // urlTail = m.Groups["Tail"].Value; //} //http://archiver.eyny.com/archiver/tid-9169460-1.html urlHead = string.Format(@"http://archiver.eyny.com/archiver/tid-{0}-", TaskInfo.Tid); urlTail = @".html"; HtmlNodeCollection nodeHeaders = null; int lastPage = 0; //排版插件 var typeSetting = new Collection <ITypeSetting> { new HtmlDecode(), new EynyTag(), new UniformFormat() }; string RawData = ""; for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++) { try { //要下載的頁數 int newCurrentPage = (TaskInfo.BeginSection + TaskInfo.PageSection - 1) / TaskInfo.PageSection; if (lastPage != newCurrentPage) //之前下載的頁數跟當前要下載的頁數 { lastPage = newCurrentPage; //記錄下載頁數,下次如果一樣就不用重抓 string url = urlHead + lastPage + urlTail; //組合網址 HtmlDocument htmlRoot = GetHtmlDocument(url); if (htmlRoot != null) { nodeHeaders = htmlRoot.DocumentNode.SelectNodes("//*[@id=\"content\"]"); } Network.RemoveSubHtmlNode(nodeHeaders[0], "div"); Network.RemoveSubHtmlNode(nodeHeaders[0], "ignore_js_op"); Network.RemoveSubHtmlNode(nodeHeaders[0], "i"); Network.RemoveSubHtmlNode(nodeHeaders[0], "p", "strong"); RawData = nodeHeaders[0].InnerText; RawData += "\r\n發表於 2001-1-1 1:1 PM"; foreach (var item in typeSetting) { item.Set(ref RawData); } if (nodeHeaders == null) { throw new Exception("下載資料為空的"); } } //Network.RemoveSubHtmlNode(nodeHeaders[0], "p"); //計算要取的區塊在第幾個 int partSection = TaskInfo.BeginSection - ((lastPage - 1) * TaskInfo.PageSection) - 1; Regex r = new Regex(@"((發表於(( [昨前]天 \d+:\d+ [PA]M)|( \d+-\d+-\d+ \d+:\d+ [PA]M)|( .+?前))))(?<Main>.+?)(?=(發表於(( [昨前]天 \d+:\d+ [PA]M)|( \d+-\d+-\d+ \d+:\d+ [PA]M)|( .+?前))))", RegexOptions.Singleline); var m = r.Matches(RawData); string tempTxt = m[partSection].Groups["Main"].Value; FileWrite.TxtWrire(tempTxt, TaskInfo.SaveFullPath, TaskInfo.TextEncoding); } catch (Exception) { //CoreManager.LoggingManager.Debug(ex.ToString()); //發生錯誤,當前區塊重取 TaskInfo.BeginSection--; TaskInfo.FailTimes++; lastPage = 0; continue; } TaskInfo.HasStopped = CurrentParameter.IsStop; } bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection; return(finish); }