private void SimpleAnalysis() { _state = true; SetControlEnable(buttonSimpleAnalysis, false); if (!String.IsNullOrEmpty(textBoxUrl.Text.TrimStart().TrimEnd())) { IAnalysis analysis = ArticleAnalysisFactory.Instance().CreateAnalysis(1); if (analysis != null) { ArticleDownAction downAction = new ArticleDownAction(); String html = downAction.GetHtml(textBoxUrl.Text.TrimStart().TrimEnd()); _simpleArticleModel = analysis.SimpleAnalysis(html); String articleJson = Newtonsoft.Json.JsonConvert.SerializeObject(_simpleArticleModel); StringBuilder builder = new StringBuilder(); builder.AppendFormat("<html><body>{0}</body></html>", _simpleArticleModel.ContentModels); _filePath = String.Format(@"{0}html\\htmltest_{1}.html", AppDomain.CurrentDomain.BaseDirectory, DateTime.Now.ToString("ffff")); File.WriteAllText(_filePath, builder.ToString(), Encoding.Unicode); //_filePath = string.Format("file:///{0}", _filePath); AddMessage(articleJson); } else { AddMessage("暂时还未支持该站点的文章采集,程序猿正在紧张处理中!!!"); } } else { AddMessage("请输入需要采集的文章地址!"); } _state = false; SetControlEnable(buttonSimpleAnalysis, true); }
public void AnalysisTest_简单分析() { ArticleDownAction action = new ArticleDownAction(); String url = "https://mp.weixin.qq.com/s/CwsiuQ10q-WQ9dROvPAhWQ"; String html = action.GetHtml(url); AnalysisWechatOfficialAccount analysis = new AnalysisWechatOfficialAccount(); SimpleArticleModel model = analysis.SimpleAnalysis(html); String json = Newtonsoft.Json.JsonConvert.SerializeObject(model); Assert.IsTrue(model.Result.Successed); }
public SimpleArticleModel SimpleAnalysis(String articleHtml) { //class="[\w_-. ]*" SimpleArticleModel model = new SimpleArticleModel(); model.Result = new HandlingResult(); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(articleHtml); try { HtmlNode hnTitle = htmlDocument.GetElementbyId("activity-name"); model.Title = hnTitle != null?hnTitle.InnerText.Trim() : ""; HtmlNode hnPubTime = htmlDocument.GetElementbyId("post-date"); model.PublicTime = hnPubTime != null?DateTime.Parse(hnPubTime.InnerText.Trim()) : DateTime.Parse("1990-01-01"); model.Author = hnPubTime != null?hnPubTime.NextSibling.NextSibling.InnerText.Trim() : ""; model.Site = new SiteModel(); HtmlNode hnSiteName = htmlDocument.GetElementbyId("post-user"); model.Site.Name = hnSiteName != null?hnSiteName.InnerText.Trim() : ""; model.Site.Category = 1; model.ContentModels = ""; HtmlNode hnContent = htmlDocument.GetElementbyId("js_content"); Int32 cnt = 0; if (hnContent != null && hnContent.HasChildNodes) { model.ContentModels = regClass.Replace(hnContent.InnerHtml, ""); model.ContentModels = regImage.Replace(hnContent.InnerHtml, "src"); } } catch (Exception ex) { model.Result.Successed = false; model.Result.Result = ex; model.Result.Message = "分析文章过程出现异常,请查看详细的堆栈信息!"; } return(model); }