Exemplo n.º 1
0
        public void PageProcess(PagePathogen pagePathogen)
        {
            var selector = new XPathSelector(pagePathogen.PageSource);
            //小说
            Novel novel = new Novel();

            //文章信息
            novel.Articles = new List <Article>();
            var nameEle = selector.SelectSingleNode("//*[@id='info']/h1");

            if (nameEle != null)
            {
                //小说名称
                novel.Name = nameEle.InnerText;
            }
            var authorEle = selector.SelectSingleNode("//*[@id='info']/p[1]");

            if (authorEle != null)
            {
                string pStr = authorEle.InnerText;
                //作者
                novel.Author = pStr.Split(':')[1];
            }
            //获取对应文章信息
            GetArticles(selector, novel, pagePathogen.Url, "//*[@id='list']/dl/dd/a");
            //传递抓取数据信息
            pagePathogen.AddResult("novel", novel);
        }
Exemplo n.º 2
0
 /// <summary>
 /// 页面解析
 /// </summary>
 /// <param name="pagePathogen"></param>
 public void PageProcess(PagePathogen pagePathogen)
 {
     try
     {
         //添加请求地址
         pagePathogen.AddResult("requestUrl", pagePathogen.Url);
         var selector = new XPathSelector(pagePathogen.PageSource);
         var node     = selector.SelectSingleNode("//*[@id='content']");
         if (node != null)
         {
             pagePathogen.AddResult("article", node.InnerHtml);
         }
         else
         {
             //记录爬取日志
             _loggerService.WriteLog(new Log()
             {
                 DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                 Msg        = pagePathogen.Url + "---未解析到数据!",
                 ClassName  = "",
                 ActionName = "",
                 Duration   = 0,
                 LogLevel   = (int)LCore.Logger.LogLevel.Warn
             });
         }
     }
     catch (Exception e)
     {
         //记录错误信息
         _loggerService.WriteLog(new Log()
         {
             DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
             LogLevel   = (int)LCore.Logger.LogLevel.Error,
             ClassName  = this.GetType().FullName,
             ActionName = e.TargetSite.Name,
             Msg        = pagePathogen.Url + "---" + e.Message + "---" + e.StackTrace
         });
     }
 }
Exemplo n.º 3
0
        public static PagePathogen GetResponse(HttpWebRequest request)
        {
            var pagePathogen = new PagePathogen();

            pagePathogen.Url  = request.Address.AbsoluteUri;
            pagePathogen.Host = request.Address.Host;
            try
            {
                using (var response = (HttpWebResponse)request.GetResponse())
                {
                    //判断如果已压缩 解压
                    if (response.ContentEncoding != null && response.ContentEncoding.ToLower().Contains("gzip"))
                    {
                        using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
                        {
                            using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                            {
                                pagePathogen.PageSource = reader.ReadToEnd();
                            }
                        }
                    }
                    else
                    {
                        using (var stream = response.GetResponseStream())
                        {
                            using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                            {
                                pagePathogen.PageSource = reader.ReadToEnd();
                            }
                        }
                    }
                }
            }
            catch (Exception)
            {
                return(pagePathogen);
            }
            return(pagePathogen);
        }