public override void InitConfig(SpiderConfig config) { IList <string> uris = new List <string>(); //解析url地址 foreach (var uri in config.Uris) { if (!string.IsNullOrEmpty(uri)) { //获取索引 int sIndex = uri.IndexOf('{'); int eIndex = uri.LastIndexOf('}'); var s = uri.Substring(sIndex + 1, eIndex - sIndex - 1); string[] sAnde = s.Split('~'); int sValue = Convert.ToInt32(sAnde[0]); int eValue = Convert.ToInt32(sAnde[1]); string urlStartPart = uri.Substring(0, sIndex); for (int i = sValue; i <= eValue; i++) { uris.Add(urlStartPart + i); } } } // config.Uris = uris; base.InitConfig(config); }
public override void InitConfig(SpiderConfig config) { var sourceImgs = _imageService.GetSourceImgs( new Application.Dto.ImageSearchInput() { RowCount = 2, IsCrawlerImgInfo = false }).Result; IList <KeyValuePair <string, Img> > l = new List <KeyValuePair <string, Img> >(); foreach (var img in sourceImgs) { for (int i = 1; i < 11; i++) { if (i == 1) { l.Add(new KeyValuePair <string, Img>(img.Url, img)); } else { l.Add(new KeyValuePair <string, Img>(img.Url + "/" + i, img)); } } } base.Datas = l; base.InitConfig(config); }
/// <summary> /// 初始化配置 /// </summary> /// <param name="config"></param> public override void InitConfig(SpiderConfig config) { //初始化数据 Datas = _novelService.GetArticles( new Application.Dto.ArticleSearchInput() { IsCrawlerContent = false, RowCount = 10 }).Select(m => { return(new KeyValuePair <string, Article>(m.Url, m)); }).ToList(); base.InitConfig(config); }
/// <summary> /// 启动爬取 /// </summary> public async virtual void Run(SpiderConfig config) { //批量 for (int i = 0; i < Uris.Count; i++) { string uri = Uris[i]; //开启新线程 await Task.Factory.StartNew(() => { var driver = new PhantomJSDriver(_service, _options); driver.Navigate().GoToUrl(uri); OnWebDriverCompleteEventArgs completeArgs = new OnWebDriverCompleteEventArgs(); var stopWatch = new Stopwatch(); stopWatch.Start(); completeArgs.Page = driver.PageSource; completeArgs.Driver = driver; stopWatch.Stop(); completeArgs.Duration = stopWatch.ElapsedMilliseconds; this.OnCompleted(this, completeArgs); }); } }
public void InitConfig(SpiderConfig config) { throw new NotImplementedException(); }
/// <summary> /// 初始化当前爬虫信息 /// </summary> /// <param name="config"></param> public virtual void InitConfig(SpiderConfig config) { Config = config; }