public void TestMemoryQueueOverFlow() { BlockingCollection <ParentLink> _newLinks = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <ParentLink> _downloadQueue = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <DownloadResult> _downloadResults = new BlockingCollection <DownloadResult>(new ConcurrentQueue <DownloadResult>(), 10); var mockProgrss = new MockProgess(); var feeder = new Feeder(_newLinks, _downloadQueue, 1, 1, mockProgrss); feeder.Start(); for (int i = 0; i < 1001; i++) //Crawler.MaxCollectionSize + 1 { _newLinks.Add(new ParentLink($"http://{i}.co.za", null)); } //download queue should be at max size of 1000 Thread.Sleep(1000); Assert.AreEqual(1000, _downloadQueue.Count); //after dequeuing the item, the database item should be readded taking the total back to 1000 _downloadQueue.Take(); Thread.Sleep(2000); Assert.AreEqual(1000, _downloadQueue.Count); //queue should now have decreased by 1. _downloadQueue.Take(); Thread.Sleep(1000); Assert.AreEqual(999, _downloadQueue.Count); feeder.Stop(); Thread.Sleep(1000); }
public void FlowTest_OneLinkLoopFound() { BlockingCollection <ParentLink> _newLinks = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <ParentLink> _downloadQueue = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <DownloadResult> _downloadResults = new BlockingCollection <DownloadResult>(new ConcurrentQueue <DownloadResult>(), 10); var progress = new MockProgess(); var feeder = new Feeder(_newLinks, _downloadQueue, 1, 1, progress, false); var downloader = new Downloader(_downloadQueue, _downloadResults, new Uri(Const.SEED), new MockSettings(), progress, false); var processor = new Processor(_downloadResults, _newLinks, new Website(new Data.Website() { Id = 1, Seed = Const.SEED }), 1, new MockSettings(), progress, false); //mock behaviour from Crawler.Start _newLinks.Add(new ParentLink(Const.LINK2, 1)); Assert.AreEqual(0, progress.TotalLinks); Assert.AreEqual(1, _newLinks.Count); feeder.Start(); Assert.AreEqual(0, progress.TotalLinks); Assert.AreEqual(0, progress.TotalDiscarded); Assert.AreEqual(0, _newLinks.Count); Assert.AreEqual(1, _downloadQueue.Count); Assert.AreEqual(0, _downloadResults.Count); downloader.Start(); Assert.AreEqual(0, progress.TotalDiscarded); Assert.AreEqual(0, progress.TotalLinks); Assert.AreEqual(1, progress.TotalDownloadResult); Assert.AreEqual(0, _newLinks.Count); Assert.AreEqual(0, _downloadQueue.Count); Assert.AreEqual(1, _downloadResults.Count); processor.Start(); Assert.AreEqual(0, progress.TotalDiscarded); Assert.AreEqual(1, progress.TotalLinks); Assert.AreEqual(1, progress.TotalDownloadResult); Assert.AreEqual(1, _newLinks.Count); Assert.AreEqual(0, _downloadQueue.Count); Assert.AreEqual(0, _downloadResults.Count); //2nd iteration for already crawled link, don't crawl duplicates. feeder.Start(); Assert.AreEqual(1, progress.TotalDiscarded); Assert.AreEqual(1, progress.TotalLinks); Assert.AreEqual(1, progress.TotalDownloadResult); Assert.AreEqual(0, _newLinks.Count); Assert.AreEqual(0, _downloadQueue.Count); Assert.AreEqual(0, _downloadResults.Count); }
public void FeederTest_NoDuplicateLinks() { Assert.Empty(Crawler.NewLinks); Assert.Empty(Crawler.DownloadQueue); Assert.Empty(Crawler.DownloadResults); const string mockSeed = "https://seed.co.za/"; const string link2 = "https://seed.co.za/link2/"; const string link3 = "https://seed.co.za/link3/"; var seedParentLink = new ParentLink(mockSeed, null); var link2ParentLink = new ParentLink(link2, 1); var link3ParentLink = new ParentLink(link3, 1); var mockProgrss = new MockProgess(); var feeder = new Feeder(1, 1, mockProgrss); feeder.Start(); Crawler.NewLinks.Add(seedParentLink); Crawler.NewLinks.Add(link2ParentLink); Crawler.NewLinks.Add(link3ParentLink); Crawler.NewLinks.Add(link2ParentLink); Crawler.NewLinks.Add(link3ParentLink); Crawler.NewLinks.Add(link3ParentLink); Crawler.NewLinks.Add(link2ParentLink); Crawler.NewLinks.Add(link3ParentLink); Crawler.NewLinks.Add(link2ParentLink); Thread.Sleep(100); Assert.Empty(Crawler.NewLinks); //should only have 2 items, the duplicate shouldn't come through. Assert.Equal(3, Crawler.DownloadQueue.Count); //the links should come in the order they were added. Assert.Equal(mockSeed, Crawler.DownloadQueue.Take().Link); Assert.Equal(link2, Crawler.DownloadQueue.Take().Link); Assert.Equal(link3, Crawler.DownloadQueue.Take().Link); feeder.Stop(); Thread.Sleep(1000); Assert.Empty(Crawler.NewLinks); Assert.Empty(Crawler.DownloadQueue); }
public void FlowTest_NoLinks() { BlockingCollection <ParentLink> _newLinks = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <ParentLink> _downloadQueue = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <DownloadResult> _downloadResults = new BlockingCollection <DownloadResult>(new ConcurrentQueue <DownloadResult>(), 10); var progress = new MockProgess(); var feeder = new Feeder(_newLinks, _downloadQueue, 1, 1, progress); var downloader = new Downloader(_downloadQueue, _downloadResults, new Uri(Const.SEED), new MockSettings(), progress, false); var processor = new Processor(_downloadResults, _newLinks, new Website(new Data.Website() { Id = 1, Seed = Const.SEED }), 1, new MockSettings(), progress, false); //mock behaviour from Crawler.Start progress.TotalLinks++; _newLinks.Add(new ParentLink(Const.SEED, null)); Assert.AreEqual(1, progress.TotalLinks); feeder.Start(); Assert.AreEqual(1, progress.TotalLinks); Assert.AreEqual(0, progress.TotalDiscarded); Assert.AreEqual(0, _downloadQueue.Count); downloader.Start(); Assert.AreEqual(1, progress.TotalLinks); Assert.AreEqual(0, progress.TotalDiscarded); Assert.AreEqual(1, progress.TotalDownloadResult); Assert.AreEqual(0, _newLinks.Count); Assert.AreEqual(0, _downloadQueue.Count); Assert.AreEqual(1, _downloadResults.Count); processor.Start(); Assert.AreEqual(1, progress.TotalLinks); Assert.AreEqual(1, progress.TotalDownloadResult); Assert.AreEqual(0, progress.TotalDiscarded); Assert.AreEqual(0, _newLinks.Count); Assert.AreEqual(0, _downloadQueue.Count); Assert.AreEqual(0, _downloadResults.Count); }
public void TestMemoryQueueOverFlow() { Assert.Empty(Crawler.NewLinks); Assert.Empty(Crawler.DownloadQueue); Assert.Empty(Crawler.DownloadResults); var mockProgrss = new MockProgess(); var feeder = new Feeder(1, 1, mockProgrss); feeder.Start(); for (int i = 0; i < 1001; i++) //Crawler.MaxCollectionSize + 1 { Crawler.NewLinks.Add(new ParentLink($"http://{i}.co.za", null)); } //download queue should be at max size of 1000 Thread.Sleep(1000); Assert.Equal(1000, Crawler.DownloadQueue.Count); //after dequeuing the item, the database item should be readded taking the total back to 1000 Crawler.DownloadQueue.Take(); Thread.Sleep(2000); Assert.Equal(1000, Crawler.DownloadQueue.Count); //queue should now have decreased by 1. Crawler.DownloadQueue.Take(); Thread.Sleep(1000); Assert.Equal(999, Crawler.DownloadQueue.Count); feeder.Stop(); Thread.Sleep(1000); while (Crawler.DownloadQueue.Count > 0) { Crawler.DownloadQueue.Take(); } Assert.Empty(Crawler.NewLinks); Assert.Empty(Crawler.DownloadQueue); Assert.Empty(Crawler.DownloadResults); }
public void FeederTest_NoDuplicateLinks() { BlockingCollection <ParentLink> _newLinks = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); BlockingCollection <ParentLink> _downloadQueue = new BlockingCollection <ParentLink>(new ConcurrentQueue <ParentLink>(), 1000); var seedParentLink = new ParentLink(Const.SEED, null); var link2ParentLink = new ParentLink(Const.LINK1, 1); var link3ParentLink = new ParentLink(Const.LINK2, 1); var mockProgrss = new MockProgess(); var feeder = new Feeder(_newLinks, _downloadQueue, 1, 1, mockProgrss); feeder.Start(); _newLinks.Add(seedParentLink); _newLinks.Add(link2ParentLink); _newLinks.Add(link3ParentLink); _newLinks.Add(link2ParentLink); _newLinks.Add(link3ParentLink); _newLinks.Add(link3ParentLink); _newLinks.Add(link2ParentLink); _newLinks.Add(link3ParentLink); _newLinks.Add(link2ParentLink); Thread.Sleep(100); Assert.IsTrue(_newLinks.Count == 0); //should only have 2 items, the duplicate shouldn't come through. Assert.AreEqual(3, _downloadQueue.Count); //the links should come in the order they were added. Assert.AreEqual(Const.SEED, _downloadQueue.Take().Link); Assert.AreEqual(Const.LINK1, _downloadQueue.Take().Link); Assert.AreEqual(Const.LINK2, _downloadQueue.Take().Link); feeder.Stop(); Thread.Sleep(1000); }