public List<Article> ParseArticle(string html,WebSiteModel websiteModel) { HtmlNodeCollection categoryNodeList = HtmlHelper.GetCategoryNodes(html, websiteModel.Rule.ArticleXPath); HtmlDocument document = new HtmlDocument(); List<Article> articles = new List<Article>(); int i = 0; foreach (HtmlNode item in categoryNodeList) { Article article = new Article(); article.Title = item.SelectNodes(websiteModel.Rule.TitleXPath)[i].OuterHtml; article.Type = Tools.ConvertType(item.SelectNodes(websiteModel.Rule.TypeXPath)[i].InnerHtml); article.IsRecommend = item.SelectNodes(websiteModel.Rule.RecomendXPath) != null; article.Summary = item.SelectNodes(websiteModel.Rule.SummaryXPath)[i].OuterHtml; article.Created = Convert.ToDateTime(item.SelectNodes(websiteModel.Rule.CreatedXPath)[i].InnerHtml); ImgLink img = new ImgLink(); img.Src = item.SelectNodes(websiteModel.Rule.ImageXPath)[i].Attributes[ImgLink.Attributes.SRC.ToString()].Value; img.NavigateUrl = string.Empty; img.Alt = item.SelectNodes(websiteModel.Rule.ImageXPath)[i].Attributes[ImgLink.Attributes.ALT.ToString()] == null ? string.Empty : item.SelectNodes(websiteModel.Rule.ImageXPath)[i].Attributes[ImgLink.Attributes.ALT.ToString()].Value; article.ImgLink = img; articles.Add(article); i++; } return articles; }
// // GET: /Edit/ public ActionResult Index() { WebSiteModel siteModel = new WebSiteModel(); siteModel.DownloadUrls = new List<UrlModel>(); return View(siteModel); }
/// <summary> /// 1. download page /// 2. save page /// 3. send info to MQ /// </summary> /// <param name="model"></param> /// <param name="?"></param> public void Process(WebSiteModel model, string dataFilePath) { string readyRoot = FileHelper.ReadyRoot; HtmlHelper helper = new HtmlHelper(); WebSiteModel newModel = WebSiteManager.GetSiteInfo(dataFilePath); foreach (UrlModel item in newModel.DownloadUrls) { string url = item.Url; string tmpFileName = string.Format("{0}{1}", FileHelper.GenerateFileName(url), FileHelper.DOWNLOAD_FILE_EXTENSION); string targetPath = string.Format("{0}{1}", readyRoot, tmpFileName); helper.Download(url); bool isSuccess = helper.SaveTo(helper.M_Html, targetPath); try { if (isSuccess) { MSMQMsg msg = new MSMQMsg(); msg.RuleFileName = dataFilePath; msg.DownloadedFileName = targetPath; CrawlJob job = new CrawlJob(); job.Send(msg); //MSMQManager.InstanceLocalComputer.Send(msg, new BinaryMessageFormatter()); } } catch (Exception ex) { LogHelper.WriteLog(ex); } } //MSMQManager.InstanceLocalComputer.Dispose(); }