private void Execute(object source, System.Timers.ElapsedEventArgs e) { string dtNow = DateTime.Now.ToString("HH:mm"); GetConfig gc = new GetConfig(); Dictionary <string, string> dic = gc.GetSysConfig(); string StartTime = dic["StartTime"]; if (dtNow.Substring(3, 2) == StartTime.Substring(3, 2) && !isStart) { isStart = true; try { NewsCrawler nc = new NewsCrawler(); nc.MissionStart(); } catch (Exception ex) { WriteLog.InsertLogs("", ex.Message); } finally { isStart = false; } } }
private void GetCrawlerPartConfig(Guid CrawlerConfigId, string Address) { GetConfig gc = new GetConfig(); sc = new StrongCrawler(Semaphore); sc.OnError += (s, e) => { WriteLog.InsertLogs(e.Uri.ToString(), e.Exception.Message); }; sc.OnCompleted += (s, e) => { NewsOperator newsOpera = new NewsOperator(); newsOpera.InsertNews(e.News); }; List <CrawlerPartConfig> List_cpc = gc.GetCrawlerPartConfig(CrawlerConfigId); List <Target> Targets = new List <Target>(); foreach (CrawlerPartConfig cpc in List_cpc) { StartSync(cpc, Address, ref Targets); } FetchDataAsync(Targets); }
/// <summary> /// 异步创建爬虫 /// </summary> /// <param name="uri">爬虫URL地址</param> /// <param name="proxy">代理服务器</param> /// <returns>网页源代码</returns> public async Task StartAsync(Uri uri, Script script, Operation operation, CrawlerPartConfig cpc) { await Task.Run(() => { try { Semaphore.Wait(); //OnStart?.Invoke(this, new OnStartEventArgs(uri)); var _service = PhantomJSDriverService.CreateDefaultService(); _service.LoadImages = false; var _option = new PhantomJSOptions(); var driver = new PhantomJSDriver(_service, _option); try { //WriteLog.InsertLogs(uri.ToString(), "步骤零"); var watch = DateTime.Now; driver.Navigate().GoToUrl(uri.ToString()); if (script != null) { driver.ExecuteScript(script.Code, script.Args); } if (operation != null) { ExecuteAction(operation, driver); } var threadId = Thread.CurrentThread.ManagedThreadId; var seconds = Convert.ToInt32(DateTime.Now.Subtract(watch).TotalSeconds); //WriteLog.InsertLogs(uri.ToString(), "步骤一"); //打开网页时间过长可能导致driver被垃圾回收,限制为30秒 if (seconds < 30) { News news = GetNews(driver, cpc, uri, threadId, seconds); OnCompleted?.Invoke(this, new OnCompletedEventArgs(news)); //OnCompleted?.Invoke(this, new OnCompletedEventArgs(uri, threadId, milliseconds, pageSource, driver, cpc)); } else { WriteLog.InsertLogs(uri.ToString(), "打开网页超时"); if (driver != null) { driver.Quit(); driver = null; //WriteLog.InsertLogs(uri.ToString(), "结束"); } } } catch (Exception ex) { OnError?.Invoke(this, new OnErrorEventArgs(uri, ex)); //WriteLog.InsertLogs(uri.ToString(), ex.Message); } finally { if (driver != null) { driver.Quit(); //WriteLog.InsertLogs(uri.ToString(), "结束"); } } } catch (Exception ex) { OnError?.Invoke(this, new OnErrorEventArgs(uri, ex)); //WriteLog.InsertLogs(uri.ToString(), ex.Message); } finally { Semaphore.Release(); } }); }