示例#1
0
        public List<Article> ParseArticle(string html,WebSiteModel websiteModel)
        {
            HtmlNodeCollection categoryNodeList = HtmlHelper.GetCategoryNodes(html, websiteModel.Rule.ArticleXPath);
            HtmlDocument document = new HtmlDocument();
            List<Article> articles = new List<Article>();

            int i = 0;

            foreach (HtmlNode item in categoryNodeList)
            {
                Article article = new Article();
                article.Title = item.SelectNodes(websiteModel.Rule.TitleXPath)[i].OuterHtml;
                article.Type = Tools.ConvertType(item.SelectNodes(websiteModel.Rule.TypeXPath)[i].InnerHtml);
                article.IsRecommend = item.SelectNodes(websiteModel.Rule.RecomendXPath) != null;
                article.Summary = item.SelectNodes(websiteModel.Rule.SummaryXPath)[i].OuterHtml;
                article.Created = Convert.ToDateTime(item.SelectNodes(websiteModel.Rule.CreatedXPath)[i].InnerHtml);

                ImgLink img = new ImgLink();
                img.Src = item.SelectNodes(websiteModel.Rule.ImageXPath)[i].Attributes[ImgLink.Attributes.SRC.ToString()].Value;
                img.NavigateUrl = string.Empty;
                img.Alt = item.SelectNodes(websiteModel.Rule.ImageXPath)[i].Attributes[ImgLink.Attributes.ALT.ToString()] == null
                        ? string.Empty : item.SelectNodes(websiteModel.Rule.ImageXPath)[i].Attributes[ImgLink.Attributes.ALT.ToString()].Value;

                article.ImgLink = img;
                articles.Add(article);

                i++;
            }

            return articles;
        }
示例#2
0
        //
        // GET: /Edit/
        public ActionResult Index()
        {
            WebSiteModel siteModel = new WebSiteModel();
            siteModel.DownloadUrls = new List<UrlModel>();

            return View(siteModel);
        }
示例#3
0
        /// <summary>
        /// 1. download page
        /// 2. save page
        /// 3. send info to MQ 
        /// </summary>
        /// <param name="model"></param>
        /// <param name="?"></param>
        public void Process(WebSiteModel model, string dataFilePath)
        {
            string readyRoot = FileHelper.ReadyRoot;

            HtmlHelper helper = new HtmlHelper();
            WebSiteModel newModel = WebSiteManager.GetSiteInfo(dataFilePath);

            foreach (UrlModel item in newModel.DownloadUrls)
            {
                string url = item.Url;

                string tmpFileName = string.Format("{0}{1}", FileHelper.GenerateFileName(url),
                                                    FileHelper.DOWNLOAD_FILE_EXTENSION);
                string targetPath = string.Format("{0}{1}", readyRoot,  tmpFileName);

                helper.Download(url);
                bool isSuccess = helper.SaveTo(helper.M_Html, targetPath);

                try
                {
                    if (isSuccess)
                    {
                        MSMQMsg msg = new MSMQMsg();
                        msg.RuleFileName = dataFilePath;
                        msg.DownloadedFileName = targetPath;

                        CrawlJob job = new CrawlJob();
                        job.Send(msg);

                        //MSMQManager.InstanceLocalComputer.Send(msg, new BinaryMessageFormatter());
                    }
                }
                catch (Exception ex)
                {
                    LogHelper.WriteLog(ex);
                }
            }

            //MSMQManager.InstanceLocalComputer.Dispose();
        }