/// <summary> /// Create custom repositories for scheduler or default if these are null /// </summary> /// <param name="allowUriRecrawling">Allow crawl this uri again if something was fail?</param> /// <param name="crawledUrlRepository">Crawled url repository</param> /// <param name="pagesToCrawlRepository">Pages to crawl repository</param> public Scheduler(bool allowUriRecrawling, ICrawledUrlRepository crawledUrlRepository, IQueueOfPagesToCrawlRepository pagesToCrawlRepository) { AllowUriRecrawling = allowUriRecrawling; CrawledUrlRepository = crawledUrlRepository ?? new CompactCrawledUrlRepository(); PagesToCrawlRepository = pagesToCrawlRepository ?? new QueueOfPagesToCrawlRepository(); }
public void NoFalseNegativesTest() { // create input collection List <Uri> inputs = GenerateRandomDataList(10000); // instantiate filter and populate it with the inputs using (ICrawledUrlRepository uut = GetInstance()) { //If all were unique then they should all return "true" for Contains() foreach (Uri input in inputs) { Assert.IsTrue(uut.AddIfNew(input)); } //If all were added successfully then they should all return "true" for Contains() foreach (Uri input in inputs) { if (!uut.Contains(input)) { Assert.Fail("False negative: {0}", input); } } } }
public MadBidScheduler(bool allowUriRecrawling, ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo) { rand1 = new Random((int)DateTime.Now.Ticks & 0x0000FFFF); _allowUriRecrawling = allowUriRecrawling; _crawledUrlRepo = crawledUrlRepo ?? new InMemoryCrawledUrlRepository(); }
public Scheduler(bool allowUriRecrawling, ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo) { _allowUriRecrawling = allowUriRecrawling; _crawledUrlRepo = crawledUrlRepo ?? new CompactCrawledUrlRepository(); _pagesToCrawlRepo = pagesToCrawlRepo ?? new FifoPagesToCrawlRepository(); }
public void SetUp() { _unitUnderTest = GetInstance(); _uri1 = new Uri("http://a.com"); _uri2 = new Uri("http://b.com"); }
public Scheduler(ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo) { _crawledUrlRepo = crawledUrlRepo ?? new InMemoryCrawledUrlStorage(); _pagesToCrawlRepo = pagesToCrawlRepo ?? new FifoPagesToCrawlStorage(); }