示例#1
0
        private void ResetCrawler()
        {
            //clear the Discoveries...
            _crawler.ClearDiscoveries();

            //clear the Politeness...
            _crawler.ClearPolitenesses();

            //clear the uncrawled CrawlRequests...
            _crawler.ClearUncrawledCrawlRequests();

            //clear the DisallowedAbsoluteUris table...
            _crawler.ClearDisallowedAbsoluteUris();
        }
示例#2
0
        public void TestCrawlerConcurrency()
        {
            var thread1 = new Thread(delegate()
            {
                try
                {
                    ApplicationSettings applicationSettings = new ApplicationSettings();
                    WebSettings webSettings = new WebSettings();

                    var crawler1 = new Crawler <ArachnodeDAO>(applicationSettings, webSettings, CrawlMode.BreadthFirstByPriority, false);

                    crawler1.ApplicationSettings.CreateCrawlRequestsFromDatabaseCrawlRequests = false;

                    crawler1.ClearUncrawledCrawlRequests();
                    crawler1.ClearDiscoveries();

                    foreach (ACrawlAction <ArachnodeDAO> crawlAction in crawler1.CrawlActions.Values)
                    {
                        crawlAction.IsEnabled = false;
                    }

                    foreach (ACrawlRule <ArachnodeDAO> crawlRule in crawler1.CrawlRules.Values)
                    {
                        crawlRule.IsEnabled = false;
                    }

                    foreach (AEngineAction <ArachnodeDAO> engineAction in crawler1.Engine.EngineActions.Values)
                    {
                        engineAction.IsEnabled = false;
                    }

                    crawler1.Engine.CrawlRequestCompleted += Engine_CrawlRequestCompleted1;
                    crawler1.Engine.CrawlCompleted        += Engine_CrawlCompleted1;

                    crawler1.Crawl(new CrawlRequest <ArachnodeDAO>(new Discovery <ArachnodeDAO>("http://cbs.com"), 2, UriClassificationType.Domain, UriClassificationType.Domain, 1, RenderType.None, RenderType.None));

                    crawler1.Engine.Start();

                    while (!_isCrawl2Completed)
                    {
                        Thread.Sleep(1000);
                    }
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception.Message);
                }
            });

            var thread2 = new Thread(delegate()
            {
                try
                {
                    ApplicationSettings applicationSettings = new ApplicationSettings();
                    WebSettings webSettings = new WebSettings();

                    var crawler2 = new Crawler <ArachnodeDAO>(applicationSettings, webSettings, CrawlMode.BreadthFirstByPriority, false);

                    crawler2.ApplicationSettings.CreateCrawlRequestsFromDatabaseCrawlRequests = false;

                    crawler2.ClearDiscoveries();

                    foreach (ACrawlAction <ArachnodeDAO> crawlAction in crawler2.CrawlActions.Values)
                    {
                        crawlAction.IsEnabled = false;
                    }

                    foreach (ACrawlRule <ArachnodeDAO> crawlRule in crawler2.CrawlRules.Values)
                    {
                        crawlRule.IsEnabled = false;
                    }

                    foreach (AEngineAction <ArachnodeDAO> engineAction in crawler2.Engine.EngineActions.Values)
                    {
                        engineAction.IsEnabled = false;
                    }

                    crawler2.Engine.CrawlRequestCompleted += Engine_CrawlRequestCompleted2;
                    crawler2.Engine.CrawlCompleted        += Engine_CrawlCompleted2;

                    crawler2.Crawl(new CrawlRequest <ArachnodeDAO>(new Discovery <ArachnodeDAO>("http://nbc.com"), 1, UriClassificationType.Domain, UriClassificationType.Domain, 1, RenderType.None, RenderType.None));

                    crawler2.Engine.Start();

                    while (!_isCrawl2Completed)
                    {
                        Thread.Sleep(1000);
                    }
                }
                catch (Exception exception)
                {
                    Console.WriteLine(exception.Message);
                }
            });

            thread1.Start();
            thread2.Start();

            while (!_isCrawl1Completed || !_isCrawl2Completed)
            {
                Thread.Sleep(1000);
            }
        }