Example #1
0
 /// <summary>
 /// Create custom repositories for scheduler or default if these are null
 /// </summary>
 /// <param name="allowUriRecrawling">Allow crawl this uri again if something was fail?</param>
 /// <param name="crawledUrlRepository">Crawled url repository</param>
 /// <param name="pagesToCrawlRepository">Pages to crawl repository</param>
 public Scheduler(bool allowUriRecrawling,
                  ICrawledUrlRepository crawledUrlRepository,
                  IQueueOfPagesToCrawlRepository pagesToCrawlRepository)
 {
     AllowUriRecrawling     = allowUriRecrawling;
     CrawledUrlRepository   = crawledUrlRepository ?? new CompactCrawledUrlRepository();
     PagesToCrawlRepository = pagesToCrawlRepository ?? new QueueOfPagesToCrawlRepository();
 }
Example #2
0
        public void NoFalseNegativesTest()
        {
            // create input collection
            List <Uri> inputs = GenerateRandomDataList(10000);

            // instantiate filter and populate it with the inputs
            using (ICrawledUrlRepository uut = GetInstance())
            {
                //If all were unique then they should all return "true" for Contains()
                foreach (Uri input in inputs)
                {
                    Assert.IsTrue(uut.AddIfNew(input));
                }

                //If all were added successfully then they should all return "true" for Contains()
                foreach (Uri input in inputs)
                {
                    if (!uut.Contains(input))
                    {
                        Assert.Fail("False negative: {0}", input);
                    }
                }
            }
        }
Example #3
0
 public MadBidScheduler(bool allowUriRecrawling, ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo)
 {
     rand1 = new Random((int)DateTime.Now.Ticks & 0x0000FFFF);
     _allowUriRecrawling = allowUriRecrawling;
     _crawledUrlRepo = crawledUrlRepo ?? new InMemoryCrawledUrlRepository();
 }
Example #4
0
 public Scheduler(bool allowUriRecrawling, ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo)
 {
     _allowUriRecrawling = allowUriRecrawling;
     _crawledUrlRepo     = crawledUrlRepo ?? new CompactCrawledUrlRepository();
     _pagesToCrawlRepo   = pagesToCrawlRepo ?? new FifoPagesToCrawlRepository();
 }
Example #5
0
 public void SetUp()
 {
     _unitUnderTest = GetInstance();
     _uri1          = new Uri("http://a.com");
     _uri2          = new Uri("http://b.com");
 }
 public void SetUp()
 {
     _unitUnderTest = GetInstance();
     _uri1 = new Uri("http://a.com");
     _uri2 = new Uri("http://b.com");
 }
Example #7
0
 public Scheduler(bool allowUriRecrawling, ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo)
 {
     _allowUriRecrawling = allowUriRecrawling;
     _crawledUrlRepo = crawledUrlRepo ?? new CompactCrawledUrlRepository();
     _pagesToCrawlRepo = pagesToCrawlRepo ?? new FifoPagesToCrawlRepository();
 }
Example #8
0
 public Scheduler(ICrawledUrlRepository crawledUrlRepo, IPagesToCrawlRepository pagesToCrawlRepo)
 {
     _crawledUrlRepo   = crawledUrlRepo ?? new InMemoryCrawledUrlStorage();
     _pagesToCrawlRepo = pagesToCrawlRepo ?? new FifoPagesToCrawlStorage();
 }