Пример #1
0
        public void Index1()
        {
            Stopwatch watch = new Stopwatch();
            //watch.Start();  //开始监视代码运行时间

            //抓取整本小说
            CrawlerController cra  = new CrawlerController();// 笔趣阁抓取小说网站小说
            string            html = cra.HttpGet("https://www.jupindai.com/book/100.html", "");

            // 获取小说名字
            Match  ma_name = Regex.Match(html, "(?<=meta property=\"og:title\" content=\").*?(?=\")");
            string name    = ma_name.Value;

            // 获取章节目录
            Match reg_mulu = Regex.Match(html, "<div class=\"panel panel-default\" id=\"list-chapterAll\">[\\s\\S]*(?=(<div class=\"panel panel-default hidden-xs\">))");


            MatchCollection mat_mulu2 = Regex.Matches(reg_mulu.Value, "<a[^>]+?href=\"([^ \"]+)\"[^>]*>([^<]+)<\\/a>");

            if (mat_mulu2.Count != 0)
            {
                ThreadPool.SetMaxThreads(30, 30);
                for (int i = 0; i < mat_mulu2.Count; i++)
                {
                    //获取章节
                    string chapters = mat_mulu2[i].Groups[2].Value;
                    //获取章节内容链接
                    string contenthref = website + mat_mulu2[i].Groups[1].Value;
                    //获取章节文本html
                    string chaptershtml = cra.HttpGet(contenthref, "");
                    //获取文本内容
                    Match  htmlmatch = Regex.Match(chaptershtml, "<div class=\"panel-body\" id=\"htmlContent\">[\\s\\S]*?<\\/div>");
                    string content   = htmlmatch.Value.ToString().Replace("<div class=\"panel-body\" id=\"htmlContent\">", "").Replace("</div>", "").Replace("&nbsp;", "").Replace("<br />", "");
                    // txt文本输出
                    string          path        = AppDomain.CurrentDomain.BaseDirectory.Replace("\\", "/") + name + "/";
                    string          tempcontent = chapters + "\r\n" + content;
                    ThreadWithState tws         = new ThreadWithState(chapters, content, path);

                    //线程池
                    //ThreadPool.QueueUserWorkItem(new WaitCallback(tws.ThreadProc2));
                    //Thread.Sleep(1000);

                    //创建执行任务的线程,并执行
                    Thread t = new Thread(new ThreadStart(tws.ThreadProc));
                    t.Start();

                    //Novel(chapters + "\r\n" + content, name, path);
                }
            }

            watch.Stop();                                                      //停止监视
            TimeSpan timespan = watch.Elapsed;                                 //获取当前实例测量得出的总时间

            Debug.WriteLine("打开窗口代码执行时间:{0}(毫秒)", timespan.TotalMilliseconds); //总毫秒数
        }
Пример #2
0
        static void Main()
        {
            CrawlerController crawler = new CrawlerController();

            crawler.Index1();
        }