Ejemplo n.º 1
0
        //异步执行获取列表文档结束
        private void ProcessListPagesComplete(IAsyncResult itfAR)
        {
            //异步执行获取列表文档完毕后,获得异步返回的结果,继续异步执行下一步(获取文章URL集合)
            CollectProcess        collectProcessListPages = (CollectProcess)((AsyncResult)itfAR).AsyncDelegate;
            ArticleCollectOffline collectOffline          = collectProcessListPages.EndInvoke(itfAR);

            if (collectOffline.CancelException == null)
            {
                CollectProcess collectProcessArticlePages = new CollectProcess(ProcessArticlePages);
                collectProcessArticlePages.BeginInvoke(collectOffline, ProcessArticlePagesComplete, null);
            }
            else
            {
                tboxStatistics.AppendText(string.Format("终止获取列表页:{0} \n", collectOffline.CancelException.Message));
                tboxStatistics.AppendText(string.Format("当前获取列表页位置:{0}\n", collectOffline.CurrentProcessedListPages));
                tboxStatistics.AppendText(string.Format("总共需要处理列表页面数:{0}\n", collectOffline.CancelException.Data["TotalListPages"]));
            }

            //输出列表文档的信息

            List <string> listPages = collectOffline.ListPages;

            tboxStatistics.AppendText(string.Format("获取列表文档所花时间:{0}\n", swGlobal.ElapsedMilliseconds));
            tboxStatistics.AppendText(string.Format("本次获取列表页面数:{0}\n", listPages.Count));
        }
Ejemplo n.º 2
0
        //异步执行采集文章结束
        private void ProcessCollectArticlesComplete(IAsyncResult itfAR)
        {
            //异步执行采集文章内容完成后
            CollectProcess        collectProcessCollectArticles = (CollectProcess)((AsyncResult)itfAR).AsyncDelegate;
            ArticleCollectOffline collectOffline = collectProcessCollectArticles.EndInvoke(itfAR);

            swGlobal.Stop();
            tboxStatistics.AppendText(string.Format("swGlobal ElapsedMilliseconds: {0} \n", swGlobal.ElapsedMilliseconds));
            //输出采集文档信息
            if (collectOffline.CancelException != null)
            {
                tboxStatistics.AppendText(string.Format("当前采集文章数:{0}\n", collectOffline.CurrentProcessedArticles));
                tboxStatistics.AppendText(string.Format("此次总共需要采集文章数:{0}\n", collectOffline.CancelException.Data["TotalArticles"]));
            }
            List <Dictionary <string, string> > articles = collectOffline.Articles;
            List <Exception> coException = collectOffline.CoException;

            printErrors(coException);

            tboxStatistics.AppendText(string.Format("采集文章总数:{0} \n", articles.Count));
            tboxStatistics.AppendText(string.Format("采集所耗时间 :{0} \n", swGlobal.ElapsedMilliseconds));
            tboxStatistics.AppendText("-----------------------------------------------------------------------------------\n");
            var arcList = from d in articles
                          orderby d["title"]
                          ascending
                          select d;

            foreach (Dictionary <string, string> article in arcList)
            {
                foreach (KeyValuePair <string, string> kvp in article)
                {
                    tboxArticlesContent.AppendText(kvp.Key + ": \n");
                    tboxArticlesContent.AppendText(kvp.Value + "\n");
                }
                tboxArticlesContent.AppendText("---------------------------------------------\n");
            }

            //恢复表单可操作
            try
            {
                btnSaveCoConfig.Enabled            = true;
                btnCoTest.Enabled                  = true;
                tabctrCoform.SelectedIndexChanged -= TabctrCoform_SelectedIndexChanged;
            }
            catch (Exception ex)
            {
            }
        }
Ejemplo n.º 3
0
        //异步执行获取文章URL集合结束
        private void ProcessArticlePagesComplete(IAsyncResult itfAR)
        {
            //异步执行获取文章URL集合完毕后,获得异步返回的结果,继续异步执行下一步(采集文档内容)
            CollectProcess        collectProcessArticlePages = (CollectProcess)((AsyncResult)itfAR).AsyncDelegate;
            ArticleCollectOffline collectOffline             = collectProcessArticlePages.EndInvoke(itfAR);

            if (collectOffline.CancelException == null)
            {
                CollectProcess collectProcessCollectArticles = new CollectProcess(ProcessCollectArticles);
                collectProcessCollectArticles.BeginInvoke(collectOffline, ProcessCollectArticlesComplete, null);
            }
            else
            {
                tboxStatistics.AppendText(string.Format("终止获取列表页:{0} \n", collectOffline.CancelException.Message));
                tboxStatistics.AppendText(string.Format("当前处理列表页位置:{0}\n", collectOffline.CurrentProcessedListPages));
                tboxStatistics.AppendText(string.Format("总共需要处理列表页面数:{0}\n", collectOffline.CancelException.Data["TotalListPages"]));
                tboxStatistics.AppendText(string.Format("当前处理文章链接数:{0}\n", collectOffline.CurrentGetArticlePages));
            }
            //输出URL集合信息
            List <string> correctListArticles = new List <string>();

            foreach (Dictionary <string, string> item in collectOffline.CorrectArticlePages)
            {
                correctListArticles.Add(item["arcpath"]);
            }
            List <string> wrongListArticles = new List <string>();

            foreach (Dictionary <string, string> item in collectOffline.WrongArticlePages)
            {
                wrongListArticles.Add(item["arcpath"]);
            }
            tboxStatistics.AppendText(string.Format("获取文章URL集合所花时间: {0}\n", swGlobal.ElapsedMilliseconds));
            tboxArticlesPages.AppendText("待采集文章链接:\n");
            foreach (string item in correctListArticles)
            {
                tboxArticlesPages.AppendText(string.Format("{0}\n", item));
            }
            tboxArticlesPages.AppendText("-------------------------------------------------------------------------------\n");
            tboxArticlesPages.AppendText("未能正确匹配内容链接,请检查匹配XPATH规则: \n");
            foreach (string item in wrongListArticles)
            {
                tboxArticlesPages.AppendText(string.Format("{0}\n", item));
            }
        }