public void TestQueuesYieldSameResult() { TestModule.SetupInMemoryStorage(); CollectorStep reference = CollectionCrawl(); CollectorStep inMemoryCrawlerCollectorStep = CollectionCrawl(); Assert.AreEqual(reference.Steps.Count, inMemoryCrawlerCollectorStep.Steps.Count); TestModule.SetupFileStorage(); CollectorStep fileStorageCollectorStep = CollectionCrawl(); Assert.AreEqual(reference.Steps.Count, fileStorageCollectorStep.Steps.Count); TestModule.SetupIsolatedStorage(); CollectorStep isolatedStorageServicesCollectorStep = CollectionCrawl(); Assert.AreEqual(reference.Steps.Count, isolatedStorageServicesCollectorStep.Steps.Count); TestModule.SetupEfServicesStorage(); CollectorStep dbServicesCollectorStep = CollectionCrawl(); Assert.AreEqual(reference.Steps.Count, dbServicesCollectorStep.Steps.Count); TestModule.SetupESentServicesStorage(); CollectorStep esentServicesCollectorStep = CollectionCrawl(); Assert.AreEqual(reference.Steps.Count, esentServicesCollectorStep.Steps.Count); }
private static CollectorStep CollectionCrawl() { var collectorStep = new CollectorStep(); var htmlDocumentProcessor = new HtmlDocumentProcessor(); using (var crawler = new Crawler(new Uri("http://ncrawler.codeplex.com"), collectorStep, htmlDocumentProcessor)) { Console.Out.WriteLine(crawler.GetType()); crawler.MaximumThreadCount = 5; crawler.UriSensitivity = UriComponents.HttpRequestUrl; crawler.ExcludeFilter = new[] { new RegexFilter( new Regex(@"(\.jpg|\.css|\.js|\.gif|\.jpeg|\.png)", RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase)) }; crawler.Crawl(); return(collectorStep); } }
private static CollectorStep CollectionCrawl() { CollectorStep collectorStep = new CollectorStep(); HtmlDocumentProcessor htmlDocumentProcessor = new HtmlDocumentProcessor(); using (Crawler crawler = new Crawler(new Uri("http://ncrawler.codeplex.com"), collectorStep, htmlDocumentProcessor)) { Console.Out.WriteLine(crawler.GetType()); crawler.MaximumThreadCount = 5; crawler.UriSensitivity = UriComponents.HttpRequestUrl; crawler.ExcludeFilter = new[] { new RegexFilter( new Regex(@"(\.jpg|\.css|\.js|\.gif|\.jpeg|\.png)", RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase)) }; crawler.Crawl(); return collectorStep; } }