public void Index1() { Stopwatch watch = new Stopwatch(); //watch.Start(); //开始监视代码运行时间 //抓取整本小说 CrawlerController cra = new CrawlerController();// 笔趣阁抓取小说网站小说 string html = cra.HttpGet("https://www.jupindai.com/book/100.html", ""); // 获取小说名字 Match ma_name = Regex.Match(html, "(?<=meta property=\"og:title\" content=\").*?(?=\")"); string name = ma_name.Value; // 获取章节目录 Match reg_mulu = Regex.Match(html, "<div class=\"panel panel-default\" id=\"list-chapterAll\">[\\s\\S]*(?=(<div class=\"panel panel-default hidden-xs\">))"); MatchCollection mat_mulu2 = Regex.Matches(reg_mulu.Value, "<a[^>]+?href=\"([^ \"]+)\"[^>]*>([^<]+)<\\/a>"); if (mat_mulu2.Count != 0) { ThreadPool.SetMaxThreads(30, 30); for (int i = 0; i < mat_mulu2.Count; i++) { //获取章节 string chapters = mat_mulu2[i].Groups[2].Value; //获取章节内容链接 string contenthref = website + mat_mulu2[i].Groups[1].Value; //获取章节文本html string chaptershtml = cra.HttpGet(contenthref, ""); //获取文本内容 Match htmlmatch = Regex.Match(chaptershtml, "<div class=\"panel-body\" id=\"htmlContent\">[\\s\\S]*?<\\/div>"); string content = htmlmatch.Value.ToString().Replace("<div class=\"panel-body\" id=\"htmlContent\">", "").Replace("</div>", "").Replace(" ", "").Replace("<br />", ""); // txt文本输出 string path = AppDomain.CurrentDomain.BaseDirectory.Replace("\\", "/") + name + "/"; string tempcontent = chapters + "\r\n" + content; ThreadWithState tws = new ThreadWithState(chapters, content, path); //线程池 //ThreadPool.QueueUserWorkItem(new WaitCallback(tws.ThreadProc2)); //Thread.Sleep(1000); //创建执行任务的线程,并执行 Thread t = new Thread(new ThreadStart(tws.ThreadProc)); t.Start(); //Novel(chapters + "\r\n" + content, name, path); } } watch.Stop(); //停止监视 TimeSpan timespan = watch.Elapsed; //获取当前实例测量得出的总时间 Debug.WriteLine("打开窗口代码执行时间:{0}(毫秒)", timespan.TotalMilliseconds); //总毫秒数 }
static void Main() { CrawlerController crawler = new CrawlerController(); crawler.Index1(); }