Пример #1
0
        public void TestQueuesYieldSameResult()
        {
            TestModule.SetupInMemoryStorage();
            CollectorStep reference = CollectionCrawl();
            CollectorStep inMemoryCrawlerCollectorStep = CollectionCrawl();

            Assert.AreEqual(reference.Steps.Count, inMemoryCrawlerCollectorStep.Steps.Count);

            TestModule.SetupFileStorage();
            CollectorStep fileStorageCollectorStep = CollectionCrawl();

            Assert.AreEqual(reference.Steps.Count, fileStorageCollectorStep.Steps.Count);

            TestModule.SetupIsolatedStorage();
            CollectorStep isolatedStorageServicesCollectorStep = CollectionCrawl();

            Assert.AreEqual(reference.Steps.Count, isolatedStorageServicesCollectorStep.Steps.Count);

            TestModule.SetupEfServicesStorage();
            CollectorStep dbServicesCollectorStep = CollectionCrawl();

            Assert.AreEqual(reference.Steps.Count, dbServicesCollectorStep.Steps.Count);

            TestModule.SetupESentServicesStorage();
            CollectorStep esentServicesCollectorStep = CollectionCrawl();

            Assert.AreEqual(reference.Steps.Count, esentServicesCollectorStep.Steps.Count);
        }
Пример #2
0
        private static CollectorStep CollectionCrawl()
        {
            var collectorStep         = new CollectorStep();
            var htmlDocumentProcessor = new HtmlDocumentProcessor();

            using (var crawler = new Crawler(new Uri("http://ncrawler.codeplex.com"), collectorStep, htmlDocumentProcessor))
            {
                Console.Out.WriteLine(crawler.GetType());
                crawler.MaximumThreadCount = 5;
                crawler.UriSensitivity     = UriComponents.HttpRequestUrl;
                crawler.ExcludeFilter      = new[]
                {
                    new RegexFilter(
                        new Regex(@"(\.jpg|\.css|\.js|\.gif|\.jpeg|\.png)",
                                  RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase))
                };
                crawler.Crawl();
                return(collectorStep);
            }
        }
Пример #3
0
 private static CollectorStep CollectionCrawl()
 {
     CollectorStep collectorStep = new CollectorStep();
     HtmlDocumentProcessor htmlDocumentProcessor = new HtmlDocumentProcessor();
     using (Crawler crawler = new Crawler(new Uri("http://ncrawler.codeplex.com"), collectorStep, htmlDocumentProcessor))
     {
         Console.Out.WriteLine(crawler.GetType());
         crawler.MaximumThreadCount = 5;
         crawler.UriSensitivity = UriComponents.HttpRequestUrl;
         crawler.ExcludeFilter = new[]
             {
                 new RegexFilter(
                     new Regex(@"(\.jpg|\.css|\.js|\.gif|\.jpeg|\.png)",
                         RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase))
             };
         crawler.Crawl();
         return collectorStep;
     }
 }