Ejemplo n.º 1
0
        public void PageProcess(PagePathogen pagePathogen)
        {
            var selector = new XPathSelector(pagePathogen.PageSource);
            //小说
            Novel novel = new Novel();

            //文章信息
            novel.Articles = new List <Article>();
            var nameEle = selector.SelectSingleNode("//*[@id='info']/h1");

            if (nameEle != null)
            {
                //小说名称
                novel.Name = nameEle.InnerText;
            }
            var authorEle = selector.SelectSingleNode("//*[@id='info']/p[1]");

            if (authorEle != null)
            {
                string pStr = authorEle.InnerText;
                //作者
                novel.Author = pStr.Split(':')[1];
            }
            //获取对应文章信息
            GetArticles(selector, novel, pagePathogen.Url, "//*[@id='list']/dl/dd/a");
            //传递抓取数据信息
            pagePathogen.AddResult("novel", novel);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// 页面解析
 /// </summary>
 /// <param name="pagePathogen"></param>
 public void PageProcess(PagePathogen pagePathogen)
 {
     try
     {
         //添加请求地址
         pagePathogen.AddResult("requestUrl", pagePathogen.Url);
         var selector = new XPathSelector(pagePathogen.PageSource);
         var node     = selector.SelectSingleNode("//*[@id='content']");
         if (node != null)
         {
             pagePathogen.AddResult("article", node.InnerHtml);
         }
         else
         {
             //记录爬取日志
             _loggerService.WriteLog(new Log()
             {
                 DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                 Msg        = pagePathogen.Url + "---未解析到数据!",
                 ClassName  = "",
                 ActionName = "",
                 Duration   = 0,
                 LogLevel   = (int)LCore.Logger.LogLevel.Warn
             });
         }
     }
     catch (Exception e)
     {
         //记录错误信息
         _loggerService.WriteLog(new Log()
         {
             DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
             LogLevel   = (int)LCore.Logger.LogLevel.Error,
             ClassName  = this.GetType().FullName,
             ActionName = e.TargetSite.Name,
             Msg        = pagePathogen.Url + "---" + e.Message + "---" + e.StackTrace
         });
     }
 }
Ejemplo n.º 3
0
 public override void HtmlParser(OnCompleteEventArgs e)
 {
     lock (_lock)
     {
         try
         {
             var stopWatch = new Stopwatch();
             stopWatch.Start();
             var article = Current;
             if (!string.IsNullOrEmpty(e.Page))
             {
                 var selector = new XPathSelector(e.Page);
                 var node     = selector.SelectSingleNode("//*[@id='content']");
                 if (node != null)
                 {
                     //获取小说内容
                     string content = node.InnerHtml;
                     article.Content          = content;
                     article.IsCrawlerContent = true;
                     //更新信息
                     _novelService.UpdateArticel(article);
                     //是否启动邮件发送
                     if (article.Novel != null && article.Novel.IsOpenEmail)
                     {
                         //发送邮件
                         EmailHelper.SendEmail(article.Title, content, new List <string>()
                         {
                             "*****@*****.**"
                         });
                     }
                     stopWatch.Stop();
                     //记录爬取日志
                     _loggerService.WriteLog(new Log()
                     {
                         DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                         Msg        = e.Uri + "请求消耗:" + e.Duration + "---" + "数据解析消耗:" + stopWatch.ElapsedMilliseconds,
                         ClassName  = "",
                         ActionName = "",
                         Duration   = e.Duration + stopWatch.ElapsedMilliseconds,
                         LogLevel   = (int)LCore.Logger.LogLevel.Info
                     });
                 }
                 else
                 {
                     //记录爬取日志
                     _loggerService.WriteLog(new Log()
                     {
                         DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                         Msg        = e.Uri + "---未解析到数据!",
                         ClassName  = "",
                         ActionName = "",
                         Duration   = e.Duration + stopWatch.ElapsedMilliseconds,
                         LogLevel   = (int)LCore.Logger.LogLevel.Warn
                     });
                 }
             }
         }
         catch (Exception exception)
         {
             //记录错误信息
             _loggerService.WriteLog(new Log()
             {
                 DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                 LogLevel   = (int)LCore.Logger.LogLevel.Error,
                 ClassName  = this.GetType().FullName,
                 ActionName = exception.TargetSite.Name,
                 Msg        = e.Uri + "---" + exception.StackTrace
             });
         }
     }
 }
Ejemplo n.º 4
0
        /// <summary>
        /// html解析
        /// </summary>
        /// <param name="e"></param>
        public override void HtmlParser(OnCompleteEventArgs e)
        {
            try
            {
                lock (_lock)
                {
                    var stopWatch = new Stopwatch();
                    stopWatch.Start();
                    var selector = new XPathSelector(e.Page);
                    //小说
                    Novel novel = new Novel();
                    //文章信息
                    novel.Articles = new List <Article>();

                    #region 小说信息

                    var nameEle = selector.SelectSingleNode("//*[@id='info']/h1");
                    if (nameEle != null)
                    {
                        novel.Name = nameEle.InnerText;
                    }
                    var authorEle = selector.SelectSingleNode("//*[@id='info']/p[1]");
                    if (authorEle != null)
                    {
                        string pStr = authorEle.InnerText;
                        novel.Author = pStr.Split(':')[1];
                    }

                    #endregion 小说信息

                    var oldNovel = _novelService.GetSingleNovel(new NovelSearchInput()
                    {
                        Name = novel.Name
                    });
                    if (oldNovel == null)
                    {
                        GetArticles(selector, novel, e.Uri, "//*[@id='list']/dl/dd/a");
                        if (novel.Articles.Count > 0)
                        {
                            //设为已爬取
                            novel.IsCrawlerArticle = true;
                        }
                        //添加小说
                        _novelService.AddNovel(novel);
                    }
                    else
                    {
                        var laestArticle = _novelService.GetLaestArticle();
                        oldNovel.Articles = new List <Article>();
                        //获取最新章节
                        GetArticles(selector, oldNovel, e.Uri, "//*[@id='list']/dl/dd/a[number(translate(@href,'.html',''))>" + laestArticle.Seq + "]");
                        //更新新章节
                        _novelService.AddArticles(oldNovel.Articles.ToList());
                    }
                    stopWatch.Stop();
                    //记录爬取日志
                    _loggerService.WriteLog(new Log()
                    {
                        DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                        Msg        = e.Uri + "请求消耗:" + e.Duration + "---" + "数据解析消耗:" + stopWatch.ElapsedMilliseconds,
                        ClassName  = "",
                        ActionName = "",
                        Duration   = e.Duration + stopWatch.ElapsedMilliseconds,
                        LogLevel   = (int)LCore.Logger.LogLevel.Info
                    });
                }
            }
            catch (Exception exception)
            {
                //记录错误信息
                _loggerService.WriteLog(new Log()
                {
                    DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                    LogLevel   = (int)LCore.Logger.LogLevel.Error,
                    ClassName  = this.GetType().Name,
                    ActionName = exception.TargetSite.Name,
                    Msg        = e.Uri + "---" + exception.Message
                });
            }
        }