Example #1
0
        public override void InitConfig(SpiderConfig config)
        {
            IList <string> uris = new List <string>();

            //解析url地址
            foreach (var uri in config.Uris)
            {
                if (!string.IsNullOrEmpty(uri))
                {
                    //获取索引
                    int      sIndex       = uri.IndexOf('{');
                    int      eIndex       = uri.LastIndexOf('}');
                    var      s            = uri.Substring(sIndex + 1, eIndex - sIndex - 1);
                    string[] sAnde        = s.Split('~');
                    int      sValue       = Convert.ToInt32(sAnde[0]);
                    int      eValue       = Convert.ToInt32(sAnde[1]);
                    string   urlStartPart = uri.Substring(0, sIndex);
                    for (int i = sValue; i <= eValue; i++)
                    {
                        uris.Add(urlStartPart + i);
                    }
                }
            }
            //
            config.Uris = uris;
            base.InitConfig(config);
        }
Example #2
0
        public override void InitConfig(SpiderConfig config)
        {
            var sourceImgs = _imageService.GetSourceImgs(
                new Application.Dto.ImageSearchInput()
            {
                RowCount         = 2,
                IsCrawlerImgInfo = false
            }).Result;
            IList <KeyValuePair <string, Img> > l = new List <KeyValuePair <string, Img> >();

            foreach (var img in sourceImgs)
            {
                for (int i = 1; i < 11; i++)
                {
                    if (i == 1)
                    {
                        l.Add(new KeyValuePair <string, Img>(img.Url, img));
                    }
                    else
                    {
                        l.Add(new KeyValuePair <string, Img>(img.Url + "/" + i, img));
                    }
                }
            }
            base.Datas = l;
            base.InitConfig(config);
        }
Example #3
0
 /// <summary>
 /// 初始化配置
 /// </summary>
 /// <param name="config"></param>
 public override void InitConfig(SpiderConfig config)
 {
     //初始化数据
     Datas = _novelService.GetArticles(
         new Application.Dto.ArticleSearchInput()
     {
         IsCrawlerContent = false,
         RowCount         = 10
     }).Select(m =>
     {
         return(new KeyValuePair <string, Article>(m.Url, m));
     }).ToList();
     base.InitConfig(config);
 }
Example #4
0
 /// <summary>
 /// 启动爬取
 /// </summary>
 public async virtual void Run(SpiderConfig config)
 {
     //批量
     for (int i = 0; i < Uris.Count; i++)
     {
         string uri = Uris[i];
         //开启新线程
         await Task.Factory.StartNew(() =>
         {
             var driver = new PhantomJSDriver(_service, _options);
             driver.Navigate().GoToUrl(uri);
             OnWebDriverCompleteEventArgs completeArgs = new OnWebDriverCompleteEventArgs();
             var stopWatch = new Stopwatch();
             stopWatch.Start();
             completeArgs.Page   = driver.PageSource;
             completeArgs.Driver = driver;
             stopWatch.Stop();
             completeArgs.Duration = stopWatch.ElapsedMilliseconds;
             this.OnCompleted(this, completeArgs);
         });
     }
 }
Example #5
0
 public void InitConfig(SpiderConfig config)
 {
     throw new NotImplementedException();
 }
Example #6
0
 /// <summary>
 /// 初始化当前爬虫信息
 /// </summary>
 /// <param name="config"></param>
 public virtual void InitConfig(SpiderConfig config)
 {
     Config = config;
 }