/// <summary> /// 爬取数据完成后执行 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void SpiderCrawler_OnCompleted(object sender, OnWebDriverCompleteEventArgs e) { //退出无头游览器 e.Driver.Quit(); //解析html HtmlParser(e); }
/// <summary> /// 启动爬取 /// </summary> public async virtual void Run(SpiderConfig config) { //批量 for (int i = 0; i < Uris.Count; i++) { string uri = Uris[i]; //开启新线程 await Task.Factory.StartNew(() => { var driver = new PhantomJSDriver(_service, _options); driver.Navigate().GoToUrl(uri); OnWebDriverCompleteEventArgs completeArgs = new OnWebDriverCompleteEventArgs(); var stopWatch = new Stopwatch(); stopWatch.Start(); completeArgs.Page = driver.PageSource; completeArgs.Driver = driver; stopWatch.Stop(); completeArgs.Duration = stopWatch.ElapsedMilliseconds; this.OnCompleted(this, completeArgs); }); } }
/// <summary> /// html解析方法 /// </summary> public abstract void HtmlParser(OnWebDriverCompleteEventArgs e);