Ejemplo n.º 1
0
 public override void HtmlParser(OnCompleteEventArgs e)
 {
     try
     {
         lock (_lock)
         {
             //解析器
             var selector = new XPathSelector(e.Page);
             //最热小说
             var dtHotNovels = selector.SelectNodes("//*[@id='hotcontent']/div/div/div[1]/a");
             //最近更新
             var latestNovels = selector.SelectNodes("//*[@id='newscontent']/div[1]/ul/li/span[2]/a");
             //点击榜
             var            mostClickNovels = selector.SelectNodes("//*[@id='newscontent']/div[2]/ul/li/span[2]/a");
             IList <string> urls            = new List <string>();
             foreach (var aEle in dtHotNovels)
             {
                 urls.Add("http://" + e.Host + aEle.GetAttributeValue("href", ""));
             }
             foreach (var aEle in latestNovels)
             {
                 urls.Add("http://" + e.Host + aEle.GetAttributeValue("href", ""));
             }
             foreach (var aEle in mostClickNovels)
             {
                 urls.Add("http://" + e.Host + aEle.GetAttributeValue("href", ""));
             }
             var spiderManager = ContainerManager.Resolve <SpiderManager>();
             //启动小说爬虫
             spiderManager.RunTask("NovelSpider", new SpiderConfig()
             {
                 Uris = urls
             });
         }
     }
     catch (Exception exception)
     {
         //记录错误信息
         _loggerService.WriteLog(new Log()
         {
             DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
             LogLevel   = (int)LCore.Logger.LogLevel.Error,
             ClassName  = this.GetType().FullName,
             ActionName = exception.TargetSite.Name,
             Msg        = e.Uri + "---" + exception.StackTrace
         });
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="selector"></param>
        /// <param name="novel"></param>
        /// <param name="uri"></param>
        /// <param name="xpath"></param>
        /// <returns></returns>
        private void GetArticles(XPathSelector selector, Novel novel, string uri, string xpath)
        {
            var aEles = selector.SelectNodes(xpath);

            if (aEles != null)
            {
                foreach (var ele in aEles)
                {
                    string   articleUri   = uri + ele.GetAttributeValue("href", "");
                    string[] s            = articleUri.Split('/');
                    string   uN           = s[s.Length - 1];
                    long     htmlFileName = long.Parse(uN.Substring(0, uN.IndexOf('.')));
                    string   articleTitle = ele.InnerText;
                    var      article      = new Article()
                    {
                        Title = articleTitle,
                        Url   = articleUri,
                        Novel = novel,
                        Seq   = htmlFileName
                    };
                    //回调
                    Config.CallBack?.Invoke(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + "--文章标题:" + articleTitle + " 文章地址:" + articleUri);
                    novel.Articles.Add(article);
                }
            }
        }
Ejemplo n.º 3
0
 public override void HtmlParser(OnCompleteEventArgs e)
 {
     try
     {
         lock (_lock)
         {
             var stopWatch = new Stopwatch();
             stopWatch.Start();
             IList <Img> imgs = new List <Img>();
             //初始化解析器
             var selector = new XPathSelector(e.Page);
             var aEles    = selector.SelectNodes("//*[@id='main']/div/div[1]/a");
             foreach (var aEle in aEles)
             {
                 string url = aEle.GetAttributeValue("href", "");
                 imgs.Add(new Img()
                 {
                     Url = url
                 });
             }
             _imageService.AddImages(imgs);
             stopWatch.Stop();
             //记录爬取日志
             _loggerService.WriteLog(new Log()
             {
                 DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                 Msg        = e.Uri + "请求消耗:" + e.Duration + "---" + "数据解析消耗:" + stopWatch.ElapsedMilliseconds,
                 ClassName  = "",
                 ActionName = "",
                 Duration   = e.Duration + stopWatch.ElapsedMilliseconds,
                 LogLevel   = (int)LCore.Logger.LogLevel.Info
             });
         }
     }
     catch (Exception exception)
     {
         //记录错误信息
         _loggerService.WriteLog(new Log()
         {
             DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
             LogLevel   = (int)LCore.Logger.LogLevel.Error,
             ClassName  = this.GetType().Name,
             ActionName = exception.TargetSite.Name,
             Msg        = e.Uri + "---" + exception.Message
         });
     }
 }
Ejemplo n.º 4
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="selector"></param>
        /// <param name="novel"></param>
        /// <param name="uri"></param>
        /// <param name="xpath"></param>
        /// <returns></returns>
        private void GetArticles(XPathSelector selector, Novel novel, string uri, string xpath)
        {
            var aEles = selector.SelectNodes(xpath);

            if (aEles != null)
            {
                foreach (var ele in aEles)
                {
                    string   articleUri   = uri + ele.GetAttributeValue("href", "");
                    string[] s            = articleUri.Split('/');
                    string   uN           = s[s.Length - 1];
                    long     htmlFileName = long.Parse(uN.Substring(0, uN.IndexOf('.')));
                    string   articleTitle = ele.InnerText;
                    var      article      = new Article()
                    {
                        Title = articleTitle,
                        Url   = articleUri,
                        Novel = novel,
                        Seq   = htmlFileName
                    };
                    novel.Articles.Add(article);
                }
            }
        }
Ejemplo n.º 5
0
        public override void HtmlParser(OnCompleteEventArgs e)
        {
            //线程锁
            lock (_lock)
            {
                try
                {
                    var stopWatch = new Stopwatch();
                    stopWatch.Start();
                    var img      = Current;
                    var selector = new XPathSelector(e.Page);
                    var imgEles  = selector.SelectNodes("//*[@id='post']/div[3]/p/img");
                    if (imgEles == null)
                    {
                        imgEles = selector.SelectNodes("//*[@id='post']/div[3]/p/a/img");
                        if (imgEles == null)
                        {
                            imgEles = selector.SelectNodes("//*[@id='post']/div[3]/p/strong/a/img");
                        }
                        if (imgEles == null)
                        {
                            Debug.Write(e.Uri);
                        }
                    }
                    if (imgEles != null)
                    {
                        IList <ImageInfo> imageInfos = new List <ImageInfo>();
                        foreach (var imgEle in imgEles)
                        {
                            string src = imgEle.GetAttributeValue("src", "");
                            if (!string.IsNullOrEmpty(src))
                            {
                                //路径
                                //string savePath = @"C:\Temp\ApiInImages\"+DateTime.Now.ToString("yyyy-MM-dd")+@"\";
                                //if (!Directory.Exists(savePath))
                                //{
                                //    Directory.CreateDirectory(savePath);
                                //}
                                ////保存图片
                                //var imageBaseInfo = ImgHelper.GetImageAndSave(src, savePath);
                                string fileName = Path.GetFileName(src);
                                if (fileName.Contains("!"))
                                {
                                    fileName = fileName.Substring(0, fileName.IndexOf("!"));
                                }
                                imageInfos.Add(
                                    new ImageInfo()
                                {
                                    Img       = img,
                                    Url       = src,
                                    SourceUrl = e.Uri,
                                    Name      = fileName
                                }
                                    );
                            }
                        }
                        img.IsCrawlerImgInfo = true;
                        _imageService.AddImageInfos(imageInfos, img);
                    }

                    stopWatch.Stop();
                    //记录爬取日志
                    _loggerService.WriteLog(new Log()
                    {
                        DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                        Msg        = e.Uri + "请求消耗:" + e.Duration + "---" + "数据解析消耗:" + stopWatch.ElapsedMilliseconds,
                        ClassName  = "",
                        ActionName = "",
                        Duration   = e.Duration + stopWatch.ElapsedMilliseconds,
                        LogLevel   = (int)LCore.Logger.LogLevel.Info
                    });
                }
                catch (Exception exception)
                {
                    //记录错误信息
                    _loggerService.WriteLog(new Log()
                    {
                        DateTime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
                        LogLevel   = (int)LCore.Logger.LogLevel.Error,
                        ClassName  = this.GetType().Name,
                        ActionName = exception.TargetSite.Name,
                        Msg        = e.Uri + "---" + exception.Message
                    });
                }
            }
        }