private void ResetCrawler() { //clear the Discoveries... _crawler.ClearDiscoveries(); //clear the Politeness... _crawler.ClearPolitenesses(); //clear the uncrawled CrawlRequests... _crawler.ClearUncrawledCrawlRequests(); //clear the DisallowedAbsoluteUris table... _crawler.ClearDisallowedAbsoluteUris(); }
public void TestCrawlerConcurrency() { var thread1 = new Thread(delegate() { try { ApplicationSettings applicationSettings = new ApplicationSettings(); WebSettings webSettings = new WebSettings(); var crawler1 = new Crawler <ArachnodeDAO>(applicationSettings, webSettings, CrawlMode.BreadthFirstByPriority, false); crawler1.ApplicationSettings.CreateCrawlRequestsFromDatabaseCrawlRequests = false; crawler1.ClearUncrawledCrawlRequests(); crawler1.ClearDiscoveries(); foreach (ACrawlAction <ArachnodeDAO> crawlAction in crawler1.CrawlActions.Values) { crawlAction.IsEnabled = false; } foreach (ACrawlRule <ArachnodeDAO> crawlRule in crawler1.CrawlRules.Values) { crawlRule.IsEnabled = false; } foreach (AEngineAction <ArachnodeDAO> engineAction in crawler1.Engine.EngineActions.Values) { engineAction.IsEnabled = false; } crawler1.Engine.CrawlRequestCompleted += Engine_CrawlRequestCompleted1; crawler1.Engine.CrawlCompleted += Engine_CrawlCompleted1; crawler1.Crawl(new CrawlRequest <ArachnodeDAO>(new Discovery <ArachnodeDAO>("http://cbs.com"), 2, UriClassificationType.Domain, UriClassificationType.Domain, 1, RenderType.None, RenderType.None)); crawler1.Engine.Start(); while (!_isCrawl2Completed) { Thread.Sleep(1000); } } catch (Exception exception) { Console.WriteLine(exception.Message); } }); var thread2 = new Thread(delegate() { try { ApplicationSettings applicationSettings = new ApplicationSettings(); WebSettings webSettings = new WebSettings(); var crawler2 = new Crawler <ArachnodeDAO>(applicationSettings, webSettings, CrawlMode.BreadthFirstByPriority, false); crawler2.ApplicationSettings.CreateCrawlRequestsFromDatabaseCrawlRequests = false; crawler2.ClearDiscoveries(); foreach (ACrawlAction <ArachnodeDAO> crawlAction in crawler2.CrawlActions.Values) { crawlAction.IsEnabled = false; } foreach (ACrawlRule <ArachnodeDAO> crawlRule in crawler2.CrawlRules.Values) { crawlRule.IsEnabled = false; } foreach (AEngineAction <ArachnodeDAO> engineAction in crawler2.Engine.EngineActions.Values) { engineAction.IsEnabled = false; } crawler2.Engine.CrawlRequestCompleted += Engine_CrawlRequestCompleted2; crawler2.Engine.CrawlCompleted += Engine_CrawlCompleted2; crawler2.Crawl(new CrawlRequest <ArachnodeDAO>(new Discovery <ArachnodeDAO>("http://nbc.com"), 1, UriClassificationType.Domain, UriClassificationType.Domain, 1, RenderType.None, RenderType.None)); crawler2.Engine.Start(); while (!_isCrawl2Completed) { Thread.Sleep(1000); } } catch (Exception exception) { Console.WriteLine(exception.Message); } }); thread1.Start(); thread2.Start(); while (!_isCrawl1Completed || !_isCrawl2Completed) { Thread.Sleep(1000); } }