Example #1
0
        private void button1_Click(object sender, EventArgs e)
        {
            try
            {
                //var c = SearchEngine.GetCurrentEngines();
                //SearchEngine.AddNewSearchEngine(new SearchEngine { Name = "google" });
                t = new TaskThreadManager(10);
                button1.Enabled = false;
                listView1.Items.Clear();

                //  GetResults("Youtube", textBox1.Text, 3);
                t.DoWork(() => GetResults("Google", textBox1.Text));
                //t.DoWork(() => GetResults("Yahoo", textBox1.Text, 1));
                //t.DoWork(() => GetResults("Bing", textBox1.Text, 2));
                //t.DoWork(() => GetResults("Baidu", textBox1.Text, 3));
                //t.DoWork(() => GetResults("Yandex", textBox1.Text));



                // GetResults("yahoo", textBox1.Text, 0);
            }
            catch (Exception er)
            {
                Text = er.Message;
            }
            finally { Text = "Working.."; }
        }
        public void DoWork_TokenCanceled_WorkNeverCompleted()
        {
            CancellationTokenSource cancellationTokenSource = new CancellationTokenSource();
            TaskThreadManager       uut = new TaskThreadManager(10, cancellationTokenSource);

            int count = 0;

            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });

            cancellationTokenSource.Cancel();

            System.Threading.Thread.Sleep(250);
            Assert.IsTrue(count < 5);
        }
Example #3
0
        public void SetUp()
        {
            _fakeHyperLinkParser = new Mock<IHyperLinkParser>();
            _fakeHttpRequester = new Mock<IPageRequester>();
            _fakeCrawlDecisionMaker = new Mock<ICrawlDecisionMaker>();
            _fakeMemoryManager = new Mock<IMemoryManager>();
            _fakeDomainRateLimiter = new Mock<IDomainRateLimiter>();
            _fakeRobotsDotTextFinder = new Mock<IRobotsDotTextFinder>();


            _dummyScheduler = new Scheduler();
            _dummyThreadManager = new TaskThreadManager(10);
            _dummyConfiguration = new CrawlConfiguration();
            _dummyConfiguration.ConfigurationExtensions.Add("somekey", "someval");

            _unitUnderTest = new PoliteWebCrawler(_dummyConfiguration, _fakeCrawlDecisionMaker.Object, _dummyThreadManager, _dummyScheduler, _fakeHttpRequester.Object, _fakeHyperLinkParser.Object, _fakeMemoryManager.Object, _fakeDomainRateLimiter.Object, _fakeRobotsDotTextFinder.Object);
            _unitUnderTest.CrawlBag.SomeVal = "SomeVal";
            _unitUnderTest.CrawlBag.SomeList = new List<string>() { "a", "b" };
            _rootUri = new Uri("http://a.com/");
        }
        public void DoWork_TokenCanceled_WorkNeverCompleted()
        {
            var cancellationTokenSource = new CancellationTokenSource();
            var uut = new TaskThreadManager(10, cancellationTokenSource);

            var count = 0;

            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });
            uut.DoWork(() => { System.Threading.Thread.Sleep(50); Interlocked.Increment(ref count); });

            cancellationTokenSource.Cancel();

            System.Threading.Thread.Sleep(600);
            Assert.IsTrue(count < 10, "Completed all 10 tasks but should not have");
        }
Example #5
0
        public void Crawl_PageCrawlCompletedEvent_IsSynchronous()
        {
            _dummyThreadManager = new TaskThreadManager(1);
            _unitUnderTest = new PoliteWebCrawler(_dummyConfiguration, _fakeCrawlDecisionMaker.Object, _dummyThreadManager, _dummyScheduler, _fakeHttpRequester.Object, _fakeHyperLinkParser.Object, _fakeMemoryManager.Object, _fakeDomainRateLimiter.Object, _fakeRobotsDotTextFinder.Object);

            int elapsedTimeForLongJob = 1000;

            _fakeHttpRequester.Setup(f => f.MakeRequest(It.IsAny<Uri>(), It.IsAny<Func<CrawledPage, CrawlDecision>>())).Returns(new CrawledPage(_rootUri));
            _fakeHyperLinkParser.Setup(f => f.GetLinks(It.Is<CrawledPage>(p => p.Uri == _rootUri))).Returns(new List<Uri>(){
                new Uri(_rootUri.AbsoluteUri + "page2.html"), //should be fired sync
                new Uri(_rootUri.AbsoluteUri + "page3.html"), //should be fired sync
                new Uri(_rootUri.AbsoluteUri + "page4.html"),  //should be fired sync
                new Uri(_rootUri.AbsoluteUri + "page5.html")}); //should be fired sync since its the last page to be crawled
            _fakeCrawlDecisionMaker.Setup(f => f.ShouldCrawlPage(It.IsAny<PageToCrawl>(), It.IsAny<CrawlContext>())).Returns(new CrawlDecision { Allow = true });
            _fakeCrawlDecisionMaker.Setup(f => f.ShouldCrawlPageLinks(It.IsAny<CrawledPage>(), It.IsAny<CrawlContext>())).Returns(new CrawlDecision { Allow = true });
            _fakeCrawlDecisionMaker.Setup(f => f.ShouldRecrawlPage(It.IsAny<CrawledPage>(), It.IsAny<CrawlContext>())).Returns(new CrawlDecision { Allow = false });

            _unitUnderTest.PageCrawlCompleted += new EventHandler<PageCrawlCompletedArgs>((sender, args) => System.Threading.Thread.Sleep(elapsedTimeForLongJob));

            Stopwatch timer = Stopwatch.StartNew();
            _unitUnderTest.Crawl(_rootUri);
            timer.Stop();

            Assert.IsTrue(timer.ElapsedMilliseconds > 4 * elapsedTimeForLongJob);
        }
Example #6
0
        public void Crawl_ExceptionThrownByFirstShouldSchedulePageLink_SetsCrawlResultError()
        {
            _dummyThreadManager = new TaskThreadManager(1);
            _unitUnderTest = new PoliteWebCrawler(_dummyConfiguration, _fakeCrawlDecisionMaker.Object, _dummyThreadManager, _dummyScheduler, _fakeHttpRequester.Object, _fakeHyperLinkParser.Object, _fakeMemoryManager.Object, _fakeDomainRateLimiter.Object, _fakeRobotsDotTextFinder.Object);
            Exception ex = new Exception("oh no");
            _fakeCrawlDecisionMaker.Setup(f => f.ShouldCrawlPage(It.IsAny<PageToCrawl>(), It.IsAny<CrawlContext>())).Throws(ex);

            CrawlResult result = _unitUnderTest.Crawl(_rootUri);

            _fakeCrawlDecisionMaker.Verify(f => f.ShouldCrawlPage(It.IsAny<PageToCrawl>(), It.IsAny<CrawlContext>()), Times.Exactly(1));
            Assert.IsTrue(result.ErrorOccurred);
            Assert.AreSame(ex, result.ErrorException);
            Assert.AreEqual(0, _dummyScheduler.Count);
            Assert.IsFalse(result.CrawlContext.IsCrawlStopRequested);
            Assert.IsFalse(result.CrawlContext.IsCrawlHardStopRequested);
        }
 public override void Initialize(IConfigurationSupport config)
 {
     _taskThreadManager = new TaskThreadManager();
     RegisterDependencies(config);
 }