Пример #1
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;



            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new BrRegex(),
                new HtmlDecode(),
                new UniformFormat(),
                new Traditional()
            };


            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                string url = string.Format("http://www.ranwen.net/files/article/{0}/{1}/{2}.html",
                                           (CommonTools.TryParse(TaskInfo.Tid, 0) / 1000).ToString(CultureInfo.InvariantCulture),
                                           TaskInfo.Tid,
                                           SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址

                HtmlDocument htmlRoot = GetHtmlDocument(url);

                try
                {
                    string tempTextFile = htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"bgdiv\"]/table[2]/tbody/tr[1]/td/div[1]/h1").InnerText
                                          + "\r\n";

                    var node = htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"content\"]");
                    Network.RemoveSubHtmlNode(node, "div");

                    tempTextFile += node.InnerHtml + "\r\n";
                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #2
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;



            string urlHead = string.Format("http://book.sfacg.com/Novel/{0}", TaskInfo.Tid);
            //string urlTail = ".html?charset=big5";

            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new SfacgToIndent(),
                new HtmlDecode(),
                new UniformFormat(),
                new Traditional()
            };


            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                string url = urlHead + SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture);//組合網址

                HtmlDocument htmlRoot = GetHtmlDocument(url);


                try
                {
                    var nodeHeaders =
                        htmlRoot.DocumentNode.SelectSingleNode(@"//*[@id=""ChapterBody""]");
                    Network.RemoveSubHtmlNode(nodeHeaders, "img");
                    string tempTextFile = nodeHeaders.InnerHtml;

                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
        public override bool Download()
        {
            CurrentParameter.IsStop = false;



            string urlHead = "http://lknovel.lightnovel.cn/main/view/";
            string urlTail = ".html?charset=big5";

            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new HtmlDecode(),
                new UniformFormat()
            };


            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                string url = urlHead + SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture) + urlTail;//組合網址

                HtmlDocument htmlRoot = GetHtmlDocument(url);


                try
                {
                    var nodeHeaders =
                        htmlRoot.DocumentNode.SelectSingleNode(@"//*[@id=""J_view""]");
                    string tempTextFile = nodeHeaders.InnerText;

                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #4
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;



            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new BrRegex(),
                new HtmlDecode(),
                new UniformFormat(),
                new Traditional()
            };


            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                try
                {
                    string url = string.Format("http://www.biquge.com/{0}/{1}.html", TaskInfo.Tid, SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址

                    HtmlDocument htmlRoot = GetHtmlDocument(url);


                    string tempTextFile = htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"wrapper\"]/div[3]/div[1]/div[2]/h1").InnerText
                                          + "\r\n" + htmlRoot.DocumentNode.SelectSingleNode("//*[@id=\"content\"]").InnerHtml + "\r\n";


                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #5
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;



            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new HtmlDecode(),
                new UniformFormat(),
                new HjwzwRegex()
            };


            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                try
                {
                    string url = string.Format("http://tw.hjwzw.com/Book/Read/{0},{1}",
                                               TaskInfo.Tid,
                                               SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址

                    HtmlDocument htmlRoot = GetHtmlDocument(url);

                    string tempTextFile = htmlRoot.DocumentNode.SelectSingleNode("/html/body/table[7]/tr/td/div[5]").InnerText;

                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #6
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;

            Regex  r = new Regex(@"(?<Head>^https?:\/\/\w*\.*ck101.com\/thread-\d+-)(?<CurrentPage>\d+)(?<Tail>-\w+\.html)");
            Match  m = r.Match(TaskInfo.Url);
            string urlHead = string.Empty, urlTail = string.Empty;

            if (m.Success)
            {
                urlHead = m.Groups["Head"].Value;
                urlTail = m.Groups["Tail"].Value;
            }

            HtmlNodeCollection nodeHeaders = null;
            int lastPage = 0;
            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new HtmlDecode(),
                new UniformFormat()
            };

            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                //要下載的頁數

                try
                {
                    int newCurrentPage = (TaskInfo.BeginSection + TaskInfo.PageSection - 1) / TaskInfo.PageSection;

                    if (lastPage != newCurrentPage)                                                       //之前下載的頁數跟當前要下載的頁數
                    {
                        lastPage = newCurrentPage;                                                        //記錄下載頁數,下次如果一樣就不用重抓
                        string url = urlHead + lastPage.ToString(CultureInfo.InvariantCulture) + urlTail; //組合網址

                        if (lastPage == 1)                                                                //卡提諾第一頁的特別處理
                        {
                            switch (TaskInfo.FailTimes % 2)                                               //常常取不到完整資料,用多個網址取
                            {
                            case 0:
                                url = string.Format("https://ck101.com/thread-{0}-1-1.html", TaskInfo.Tid);
                                break;

                            case 1:
                                url = string.Format("https://m.ck101.com/forum.php?mod=redirect&ptid={0}&authorid=0&postno=1", TaskInfo.Tid);
                                break;

                            case 2:
                                url = string.Format("https://m.ck101.com/forum.php?mod=redirect&ptid={0}&authorid=0&postno=1", TaskInfo.Tid);
                                break;
                            }
                        }

                        HtmlDocument htmlRoot = GetHtmlDocumentReplaceDivToEmpty(url);

                        if (htmlRoot != null)
                        {
                            nodeHeaders = htmlRoot.DocumentNode.SelectNodes("//*[@class=\"t_f\"]");
                        }
                    }

                    //計算要取的區塊在第幾個
                    int partSection = TaskInfo.BeginSection - ((lastPage - 1) * TaskInfo.PageSection) - 1;
                    if (nodeHeaders == null)
                    {
                        throw new Exception("下載資料為空的");
                    }
                    Network.RemoveSubHtmlNode(nodeHeaders[partSection], "div");
                    Network.RemoveSubHtmlNode(nodeHeaders[partSection], "ignore_js_op");
                    Network.RemoveSubHtmlNode(nodeHeaders[partSection], "i");
                    Network.RemoveSubHtmlNode(nodeHeaders[partSection], "script");
                    string tempTxt = nodeHeaders[partSection].InnerText;

                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTxt);
                    }
                    FileWrite.TxtWrire(tempTxt, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;
                    lastPage = 0;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #7
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;



            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new AnnotationRegex(),
                new BrRegex(),
                new PRegex(),
                new HtmlDecode(),
                new UniformFormat(),
            };


            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                string url = string.Format("http://big5.quanben5.com/n/{0}/{1}.html",
                                           TaskInfo.Tid,
                                           SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址

                HtmlDocument htmlRoot = GetHtmlDocument(url);

                try
                {
                    Regex  r        = new Regex(@"ajax_post\('book','ajax_content','pinyin','(?<pinyin>\S+)','content_id','(?<content_id>\d+)','sky','(?<sky>\S+)','t','(?<t>\d+)'\)");
                    Match  m        = r.Match(htmlRoot.DocumentNode.InnerHtml);
                    string formData = "";
                    if (m.Success)
                    {
                        string timestamp = Convert.ToInt32(DateTime.UtcNow.AddHours(8).Subtract(new DateTime(1970, 1, 1)).TotalSeconds).ToString() + "000";
                        formData = string.Format("pinyin={0}&content_id={1}&sky={2}&t={3}&_type=ajax&rndval={4}",
                                                 m.Groups["pinyin"].Value,
                                                 m.Groups["content_id"].Value,
                                                 m.Groups["sky"].Value,
                                                 m.Groups["t"].Value,
                                                 timestamp
                                                 );
                    }

                    HtmlDocument htmlPostResponse = PostHtmlDocument(formData);

                    var titleNode = htmlRoot.DocumentNode.SelectSingleNode("/html/body/div[3]/div/div[2]/h1");
                    //Network.RemoveSubHtmlNode(node, "div");

                    string tempTextFile = titleNode.InnerText + "\r\n" + htmlPostResponse.DocumentNode.InnerHtml + "\r\n";
                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #8
0
        public override bool Download()
        {
            CurrentParameter.IsStop = false;

            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                //new AnnotationRegex(),
                new Remove0007(),
                new BrRegex(),
                //new PRegex(),
                new HtmlDecode(),
                new UniformFormat(),
            };

            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                string url = string.Format("https://www.wfxs.org/html/{0}/{1}.html",
                                           TaskInfo.Tid,
                                           SectionNames[TaskInfo.CurrentSection].ToString(CultureInfo.InvariantCulture));//組合網址


                try
                {
                    string htmlstring = GetHtmlString(url);
                    string content    = "";
                    Regex  r          = new Regex(@"<a href=""\/html\/\d+\/"">.+?<\/a>(?<content>.+?)<script>style_4\(\);<\/script>", RegexOptions.Singleline);
                    Match  m          = r.Match(htmlstring);
                    if (m.Success)
                    {
                        content = m.Groups["content"].Value;
                    }
                    HtmlDocument htmlRoot = Network.GetHtmlDocument(content);

                    var node = htmlRoot.DocumentNode;
                    Network.RemoveSubHtmlNode(node, "div");

                    string tempTextFile = node.InnerHtml + "\r\n";
                    foreach (var item in typeSetting)
                    {
                        item.Set(ref tempTextFile);
                    }
                    FileWrite.TxtWrire(tempTextFile, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;

                    continue;
                }

                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }
Пример #9
0
        public override bool Download()
        {
            //Regex r = new Regex(@"(?<Head>^http:\/\/\w*\.eyny.com\/thread-\d+-)(?<CurrentPage>\d+)(?<Tail>-\w+\.html)");
            //Match m = r.Match(TaskInfo.Url);
            string urlHead = string.Empty, urlTail = string.Empty;

            //if (m.Success)
            //{
            //    urlHead = m.Groups["Head"].Value;
            //    urlTail = m.Groups["Tail"].Value;
            //}
            //http://archiver.eyny.com/archiver/tid-9169460-1.html
            urlHead = string.Format(@"http://archiver.eyny.com/archiver/tid-{0}-", TaskInfo.Tid);
            urlTail = @".html";
            HtmlNodeCollection nodeHeaders = null;

            int lastPage = 0;
            //排版插件
            var typeSetting = new Collection <ITypeSetting>
            {
                new HtmlDecode(),
                new EynyTag(),
                new UniformFormat()
            };


            string RawData = "";

            for (; TaskInfo.BeginSection <= TaskInfo.EndSection && !CurrentParameter.IsStop; TaskInfo.BeginSection++)
            {
                try
                {
                    //要下載的頁數
                    int newCurrentPage = (TaskInfo.BeginSection + TaskInfo.PageSection - 1) / TaskInfo.PageSection;

                    if (lastPage != newCurrentPage)                //之前下載的頁數跟當前要下載的頁數
                    {
                        lastPage = newCurrentPage;                 //記錄下載頁數,下次如果一樣就不用重抓
                        string url = urlHead + lastPage + urlTail; //組合網址

                        HtmlDocument htmlRoot = GetHtmlDocument(url);

                        if (htmlRoot != null)
                        {
                            nodeHeaders = htmlRoot.DocumentNode.SelectNodes("//*[@id=\"content\"]");
                        }


                        Network.RemoveSubHtmlNode(nodeHeaders[0], "div");
                        Network.RemoveSubHtmlNode(nodeHeaders[0], "ignore_js_op");
                        Network.RemoveSubHtmlNode(nodeHeaders[0], "i");
                        Network.RemoveSubHtmlNode(nodeHeaders[0], "p", "strong");
                        RawData  = nodeHeaders[0].InnerText;
                        RawData += "\r\n發表於 2001-1-1 1:1 PM";

                        foreach (var item in typeSetting)
                        {
                            item.Set(ref RawData);
                        }
                        if (nodeHeaders == null)
                        {
                            throw new Exception("下載資料為空的");
                        }
                    }
                    //Network.RemoveSubHtmlNode(nodeHeaders[0], "p");


                    //計算要取的區塊在第幾個
                    int partSection = TaskInfo.BeginSection - ((lastPage - 1) * TaskInfo.PageSection) - 1;



                    Regex  r       = new Regex(@"((發表於(( [昨前]天 \d+:\d+ [PA]M)|( \d+-\d+-\d+ \d+:\d+ [PA]M)|( .+?前))))(?<Main>.+?)(?=(發表於(( [昨前]天 \d+:\d+ [PA]M)|( \d+-\d+-\d+ \d+:\d+ [PA]M)|( .+?前))))", RegexOptions.Singleline);
                    var    m       = r.Matches(RawData);
                    string tempTxt = m[partSection].Groups["Main"].Value;
                    FileWrite.TxtWrire(tempTxt, TaskInfo.SaveFullPath, TaskInfo.TextEncoding);
                }
                catch (Exception)
                {
                    //CoreManager.LoggingManager.Debug(ex.ToString());
                    //發生錯誤,當前區塊重取
                    TaskInfo.BeginSection--;
                    TaskInfo.FailTimes++;
                    lastPage = 0;

                    continue;
                }


                TaskInfo.HasStopped = CurrentParameter.IsStop;
            }

            bool finish = TaskInfo.CurrentSection == TaskInfo.EndSection;

            return(finish);
        }