public void PageProcess(PagePathogen pagePathogen) { var selector = new XPathSelector(pagePathogen.PageSource); //小说 Novel novel = new Novel(); //文章信息 novel.Articles = new List <Article>(); var nameEle = selector.SelectSingleNode("//*[@id='info']/h1"); if (nameEle != null) { //小说名称 novel.Name = nameEle.InnerText; } var authorEle = selector.SelectSingleNode("//*[@id='info']/p[1]"); if (authorEle != null) { string pStr = authorEle.InnerText; //作者 novel.Author = pStr.Split(':')[1]; } //获取对应文章信息 GetArticles(selector, novel, pagePathogen.Url, "//*[@id='list']/dl/dd/a"); //传递抓取数据信息 pagePathogen.AddResult("novel", novel); }
/// <summary> /// 页面解析 /// </summary> /// <param name="pagePathogen"></param> public void PageProcess(PagePathogen pagePathogen) { try { //添加请求地址 pagePathogen.AddResult("requestUrl", pagePathogen.Url); var selector = new XPathSelector(pagePathogen.PageSource); var node = selector.SelectSingleNode("//*[@id='content']"); if (node != null) { pagePathogen.AddResult("article", node.InnerHtml); } else { //记录爬取日志 _loggerService.WriteLog(new Log() { DateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"), Msg = pagePathogen.Url + "---未解析到数据!", ClassName = "", ActionName = "", Duration = 0, LogLevel = (int)LCore.Logger.LogLevel.Warn }); } } catch (Exception e) { //记录错误信息 _loggerService.WriteLog(new Log() { DateTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"), LogLevel = (int)LCore.Logger.LogLevel.Error, ClassName = this.GetType().FullName, ActionName = e.TargetSite.Name, Msg = pagePathogen.Url + "---" + e.Message + "---" + e.StackTrace }); } }