Exemple #1
0
        public void SetUp()
        {
            _page = new PageToCrawl { Uri = new Uri("http://a.com/") };
            _pages = new List<PageToCrawl> { new PageToCrawl { Uri = new Uri("http://a.com/") }, new PageToCrawl { Uri = new Uri("http://b.com/") } };
            _fakeCrawledUrlRepo = new Mock<ICrawledUrlRepository>();
            _fakePagesToCrawlRepo = new Mock<IPagesToCrawlRepository>();

            _unitUnderTest = new Scheduler(false, _fakeCrawledUrlRepo.Object, _fakePagesToCrawlRepo.Object);
        }
Exemple #2
0
        public void Add_UriRecrawlingDisabled_UrlHasBeenCrawled_IsRetry_AddsToBothRepos()
        {
            _page.IsRetry = true;
            _unitUnderTest = new Scheduler(false, _fakeCrawledUrlRepo.Object, _fakePagesToCrawlRepo.Object);

            _unitUnderTest.Add(_page);

            _fakeCrawledUrlRepo.Verify(f => f.AddIfNew(_page.Uri), Times.Never());
            _fakePagesToCrawlRepo.Verify(f => f.Add(_page));
        }
        public void SetUp()
        {
            _fakeHyperLinkParser = new Mock<IHyperLinkParser>();
            _fakeHttpRequester = new Mock<IPageRequester>();
            _fakeCrawlDecisionMaker = new Mock<ICrawlDecisionMaker>();
            _fakeDomainRateLimiter = new Mock<IDomainRateLimiter>();
            _fakeMemoryManager = new Mock<IMemoryManager>();
            _fakeRobotsDotTextFinder = new Mock<IRobotsDotTextFinder>();
            _fakeRobotsDotText = new Mock<IRobotsDotText>();

            _dummyScheduler = new Scheduler();
            _dummyThreadManager = new ManualThreadManager(1);
            _dummyConfiguration = new CrawlConfiguration();
            _dummyConfiguration.ConfigurationExtensions.Add("somekey", "someval");

            _rootUri = new Uri("http://a.com/");
        }
Exemple #4
0
        public void SetUp()
        {
            _fakeHyperLinkParser = new Mock<IHyperLinkParser>();
            _fakeHttpRequester = new Mock<IPageRequester>();
            _fakeCrawlDecisionMaker = new Mock<ICrawlDecisionMaker>();
            _fakeMemoryManager = new Mock<IMemoryManager>();
            _fakeDomainRateLimiter = new Mock<IDomainRateLimiter>();
            _fakeRobotsDotTextFinder = new Mock<IRobotsDotTextFinder>();


            _dummyScheduler = new Scheduler();
            _dummyThreadManager = new TaskThreadManager(10);
            _dummyConfiguration = new CrawlConfiguration();
            _dummyConfiguration.ConfigurationExtensions.Add("somekey", "someval");

            _unitUnderTest = new PoliteWebCrawler(_dummyConfiguration, _fakeCrawlDecisionMaker.Object, _dummyThreadManager, _dummyScheduler, _fakeHttpRequester.Object, _fakeHyperLinkParser.Object, _fakeMemoryManager.Object, _fakeDomainRateLimiter.Object, _fakeRobotsDotTextFinder.Object);
            _unitUnderTest.CrawlBag.SomeVal = "SomeVal";
            _unitUnderTest.CrawlBag.SomeList = new List<string>() { "a", "b" };
            _rootUri = new Uri("http://a.com/");
        }
Exemple #5
0
        public void Add_UriRecrawlingEnabled_AddsToPagesToCrawlRepo()
        {
            _unitUnderTest = new Scheduler(true, _fakeCrawledUrlRepo.Object, _fakePagesToCrawlRepo.Object);

            _unitUnderTest.Add(_page);

            _fakeCrawledUrlRepo.Verify(f => f.AddIfNew(_page.Uri), Times.Never());
            _fakePagesToCrawlRepo.Verify(f => f.Add(_page));
        }
Exemple #6
0
        public void Add_UriRecrawlingEnabled_AddsBothToPagesToCrawlRepo()
        {
            _unitUnderTest = new Scheduler(true, _fakeCrawledUrlRepo.Object, _fakePagesToCrawlRepo.Object);

            _unitUnderTest.Add(_pages);

            _fakeCrawledUrlRepo.Verify(f => f.AddIfNew(It.IsAny<Uri>()), Times.Never());
            _fakePagesToCrawlRepo.Verify(f => f.Add(_pages[0]));
            _fakePagesToCrawlRepo.Verify(f => f.Add(_pages[1]));
        }
Exemple #7
0
        public void SetUp()
        {
            _fakeCrawledUrlRepo = new Mock<ICrawledUrlRepository>();
            _fakePagesToCrawlRepo = new Mock<IPagesToCrawlRepository>();

            _unitUnderTest = new Scheduler(false, _fakeCrawledUrlRepo.Object, _fakePagesToCrawlRepo.Object);
        }