//异步执行获取列表文档结束 private void ProcessListPagesComplete(IAsyncResult itfAR) { //异步执行获取列表文档完毕后,获得异步返回的结果,继续异步执行下一步(获取文章URL集合) CollectProcess collectProcessListPages = (CollectProcess)((AsyncResult)itfAR).AsyncDelegate; ArticleCollectOffline collectOffline = collectProcessListPages.EndInvoke(itfAR); if (collectOffline.CancelException == null) { CollectProcess collectProcessArticlePages = new CollectProcess(ProcessArticlePages); collectProcessArticlePages.BeginInvoke(collectOffline, ProcessArticlePagesComplete, null); } else { tboxStatistics.AppendText(string.Format("终止获取列表页:{0} \n", collectOffline.CancelException.Message)); tboxStatistics.AppendText(string.Format("当前获取列表页位置:{0}\n", collectOffline.CurrentProcessedListPages)); tboxStatistics.AppendText(string.Format("总共需要处理列表页面数:{0}\n", collectOffline.CancelException.Data["TotalListPages"])); } //输出列表文档的信息 List <string> listPages = collectOffline.ListPages; tboxStatistics.AppendText(string.Format("获取列表文档所花时间:{0}\n", swGlobal.ElapsedMilliseconds)); tboxStatistics.AppendText(string.Format("本次获取列表页面数:{0}\n", listPages.Count)); }
//异步执行采集文章结束 private void ProcessCollectArticlesComplete(IAsyncResult itfAR) { //异步执行采集文章内容完成后 CollectProcess collectProcessCollectArticles = (CollectProcess)((AsyncResult)itfAR).AsyncDelegate; ArticleCollectOffline collectOffline = collectProcessCollectArticles.EndInvoke(itfAR); swGlobal.Stop(); tboxStatistics.AppendText(string.Format("swGlobal ElapsedMilliseconds: {0} \n", swGlobal.ElapsedMilliseconds)); //输出采集文档信息 if (collectOffline.CancelException != null) { tboxStatistics.AppendText(string.Format("当前采集文章数:{0}\n", collectOffline.CurrentProcessedArticles)); tboxStatistics.AppendText(string.Format("此次总共需要采集文章数:{0}\n", collectOffline.CancelException.Data["TotalArticles"])); } List <Dictionary <string, string> > articles = collectOffline.Articles; List <Exception> coException = collectOffline.CoException; printErrors(coException); tboxStatistics.AppendText(string.Format("采集文章总数:{0} \n", articles.Count)); tboxStatistics.AppendText(string.Format("采集所耗时间 :{0} \n", swGlobal.ElapsedMilliseconds)); tboxStatistics.AppendText("-----------------------------------------------------------------------------------\n"); var arcList = from d in articles orderby d["title"] ascending select d; foreach (Dictionary <string, string> article in arcList) { foreach (KeyValuePair <string, string> kvp in article) { tboxArticlesContent.AppendText(kvp.Key + ": \n"); tboxArticlesContent.AppendText(kvp.Value + "\n"); } tboxArticlesContent.AppendText("---------------------------------------------\n"); } //恢复表单可操作 try { btnSaveCoConfig.Enabled = true; btnCoTest.Enabled = true; tabctrCoform.SelectedIndexChanged -= TabctrCoform_SelectedIndexChanged; } catch (Exception ex) { } }
//异步执行获取文章URL集合结束 private void ProcessArticlePagesComplete(IAsyncResult itfAR) { //异步执行获取文章URL集合完毕后,获得异步返回的结果,继续异步执行下一步(采集文档内容) CollectProcess collectProcessArticlePages = (CollectProcess)((AsyncResult)itfAR).AsyncDelegate; ArticleCollectOffline collectOffline = collectProcessArticlePages.EndInvoke(itfAR); if (collectOffline.CancelException == null) { CollectProcess collectProcessCollectArticles = new CollectProcess(ProcessCollectArticles); collectProcessCollectArticles.BeginInvoke(collectOffline, ProcessCollectArticlesComplete, null); } else { tboxStatistics.AppendText(string.Format("终止获取列表页:{0} \n", collectOffline.CancelException.Message)); tboxStatistics.AppendText(string.Format("当前处理列表页位置:{0}\n", collectOffline.CurrentProcessedListPages)); tboxStatistics.AppendText(string.Format("总共需要处理列表页面数:{0}\n", collectOffline.CancelException.Data["TotalListPages"])); tboxStatistics.AppendText(string.Format("当前处理文章链接数:{0}\n", collectOffline.CurrentGetArticlePages)); } //输出URL集合信息 List <string> correctListArticles = new List <string>(); foreach (Dictionary <string, string> item in collectOffline.CorrectArticlePages) { correctListArticles.Add(item["arcpath"]); } List <string> wrongListArticles = new List <string>(); foreach (Dictionary <string, string> item in collectOffline.WrongArticlePages) { wrongListArticles.Add(item["arcpath"]); } tboxStatistics.AppendText(string.Format("获取文章URL集合所花时间: {0}\n", swGlobal.ElapsedMilliseconds)); tboxArticlesPages.AppendText("待采集文章链接:\n"); foreach (string item in correctListArticles) { tboxArticlesPages.AppendText(string.Format("{0}\n", item)); } tboxArticlesPages.AppendText("-------------------------------------------------------------------------------\n"); tboxArticlesPages.AppendText("未能正确匹配内容链接,请检查匹配XPATH规则: \n"); foreach (string item in wrongListArticles) { tboxArticlesPages.AppendText(string.Format("{0}\n", item)); } }