示例#1
0
 private void SimpleAnalysis()
 {
     _state = true;
     SetControlEnable(buttonSimpleAnalysis, false);
     if (!String.IsNullOrEmpty(textBoxUrl.Text.TrimStart().TrimEnd()))
     {
         IAnalysis analysis = ArticleAnalysisFactory.Instance().CreateAnalysis(1);
         if (analysis != null)
         {
             ArticleDownAction downAction = new ArticleDownAction();
             String            html       = downAction.GetHtml(textBoxUrl.Text.TrimStart().TrimEnd());
             _simpleArticleModel = analysis.SimpleAnalysis(html);
             String        articleJson = Newtonsoft.Json.JsonConvert.SerializeObject(_simpleArticleModel);
             StringBuilder builder     = new StringBuilder();
             builder.AppendFormat("<html><body>{0}</body></html>", _simpleArticleModel.ContentModels);
             _filePath = String.Format(@"{0}html\\htmltest_{1}.html", AppDomain.CurrentDomain.BaseDirectory, DateTime.Now.ToString("ffff"));
             File.WriteAllText(_filePath, builder.ToString(), Encoding.Unicode);
             //_filePath = string.Format("file:///{0}", _filePath);
             AddMessage(articleJson);
         }
         else
         {
             AddMessage("暂时还未支持该站点的文章采集,程序猿正在紧张处理中!!!");
         }
     }
     else
     {
         AddMessage("请输入需要采集的文章地址!");
     }
     _state = false;
     SetControlEnable(buttonSimpleAnalysis, true);
 }
        public void AnalysisTest_简单分析()
        {
            ArticleDownAction             action   = new ArticleDownAction();
            String                        url      = "https://mp.weixin.qq.com/s/CwsiuQ10q-WQ9dROvPAhWQ";
            String                        html     = action.GetHtml(url);
            AnalysisWechatOfficialAccount analysis = new AnalysisWechatOfficialAccount();
            SimpleArticleModel            model    = analysis.SimpleAnalysis(html);
            String                        json     = Newtonsoft.Json.JsonConvert.SerializeObject(model);

            Assert.IsTrue(model.Result.Successed);
        }
        public SimpleArticleModel SimpleAnalysis(String articleHtml)
        {
            //class="[\w_-. ]*"
            SimpleArticleModel model = new SimpleArticleModel();

            model.Result = new HandlingResult();
            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(articleHtml);
            try
            {
                HtmlNode hnTitle = htmlDocument.GetElementbyId("activity-name");
                model.Title = hnTitle != null?hnTitle.InnerText.Trim() : "";

                HtmlNode hnPubTime = htmlDocument.GetElementbyId("post-date");
                model.PublicTime = hnPubTime != null?DateTime.Parse(hnPubTime.InnerText.Trim()) : DateTime.Parse("1990-01-01");

                model.Author = hnPubTime != null?hnPubTime.NextSibling.NextSibling.InnerText.Trim() : "";

                model.Site = new SiteModel();
                HtmlNode hnSiteName = htmlDocument.GetElementbyId("post-user");
                model.Site.Name = hnSiteName != null?hnSiteName.InnerText.Trim() : "";

                model.Site.Category = 1;
                model.ContentModels = "";
                HtmlNode hnContent = htmlDocument.GetElementbyId("js_content");
                Int32    cnt       = 0;
                if (hnContent != null && hnContent.HasChildNodes)
                {
                    model.ContentModels = regClass.Replace(hnContent.InnerHtml, "");
                    model.ContentModels = regImage.Replace(hnContent.InnerHtml, "src");
                }
            }
            catch (Exception ex)
            {
                model.Result.Successed = false;
                model.Result.Result    = ex;
                model.Result.Message   = "分析文章过程出现异常,请查看详细的堆栈信息!";
            }
            return(model);
        }