public async Task ShouldObserveMaxDepth()
        {
            var totalHyperlinkCount = 1;

            HttpContent getRandomHtml(HttpRequestMessage message)
            {
                var linkCount = new Random().Next(10, 25);

                Interlocked.Add(ref totalHyperlinkCount, linkCount);

                return(_html.GetHtmlWithHyperlinks(linkCount));
            }

            var messageHandler = new MockHttpMessageHandler();
            var httpClient     = messageHandler.ToHttpClient();

            messageHandler.When("http://host.com/*")
            .Respond(m => getRandomHtml(m));

            var settings = new HyperlinkCrawlerSettings();
            var request  = new HyperlinkCrawlRequest("http://host.com/page")
            {
                MaxDepth = 5
            };
            var hcs = new HyperlinkCrawlerStrategy(settings, httpClient, _logger, request);
            await hcs.ProcessRequestAsync(request, CancellationToken.None);

            await hcs.Completion;

            Assert.Equal(
                6,
                hcs.State.ProcessedUriCount + hcs.State.FailedUriCount);
        }
        public async Task ShouldProcessAllLinks()
        {
            var totalHyperlinkCount             = 1; // initial url
            var deterministicHyperlinkCountdown = 11;

            HttpContent getRandomHtml(HttpRequestMessage _)
            {
                var linkCount = Interlocked.Decrement(ref deterministicHyperlinkCountdown);

                if (linkCount < 0)
                {
                    linkCount = 0;
                }

                Interlocked.Add(ref totalHyperlinkCount, linkCount);

                return(_html.GetHtmlWithHyperlinks(linkCount));
            }

            var messageHandler = new MockHttpMessageHandler();

            messageHandler.When("http://host.com/*")
            .Respond(m => getRandomHtml(m));

            var httpClient = messageHandler.ToHttpClient();

            // determines the TimeSpan that auto-completion detection awaits - allowing
            // for any additional heartbeats that signal there are still URIs being
            // processed. In a non-unit-test, real crawler scenario, this should be set
            // to a value that allows for processing URIs over an internet connection,
            // ie. accounts for latency in dns resolution and server response times
            httpClient.Timeout = TimeSpan.FromMilliseconds(300);


            var settings = new HyperlinkCrawlerSettings();
            var request  = new HyperlinkCrawlRequest("http://host.com/page");
            var hcs      = new HyperlinkCrawlerStrategy(settings, httpClient, _logger, request);

            // act
            await hcs.ProcessRequestAsync(request, CancellationToken.None);

            await hcs.Completion;

            // The deterministicHyperlinkCountdown allows us to pre-calculate the total
            // number of hyperlinks that will be "found" by being atomically decremented
            // by 1, until it reaches 0
            // ∑(j+1, j=0 to 9) + 1 is commutatively equal to,
            // (10 + 9 + 8 + 7 + 6 + 5 + 4 + 3 + 2 + 1) + 1 (the initial URI) = 56
            Assert.Equal(56, totalHyperlinkCount);

            Assert.Equal(
                totalHyperlinkCount,
                hcs.State.ProcessedUriCount + hcs.State.FailedUriCount);
        }
        public async Task ShouldObserveCompletionWindow()
        {
            var totalHyperlinkCount = 1;

            async Task <HttpResponseMessage> getRandomHtml(HttpRequestMessage message)
            {
                await Task.Delay(100 *new Random().Next(1, 3));

                var linkCount = new Random().Next(3, 5);

                Interlocked.Add(ref totalHyperlinkCount, linkCount);

                return(new HttpResponseMessage(HttpStatusCode.OK)
                {
                    Content = _html.GetHtmlWithHyperlinks(linkCount)
                });
            }

            var messageHandler = new MockHttpMessageHandler();
            var httpClient     = messageHandler.ToHttpClient();

            messageHandler.When("http://host.com/*")
            .Respond(async m => await getRandomHtml(m));

            var settings = new HyperlinkCrawlerSettings();
            var request  = new HyperlinkCrawlRequest("http://host.com/page")
            {
                CompletionWindow = TimeSpan.FromSeconds(3)
            };
            var hcs = new HyperlinkCrawlerStrategy(settings, httpClient, _logger, request);

            // Add on some additional time for ProcessRequestAsync to initialize the state
            // and dataflow. Crawler.cs does the same.
            var cts = new CancellationTokenSource();

            cts.CancelAfter(request.CompletionWindow.Add(TimeSpan.FromMilliseconds(200)));

            await hcs.ProcessRequestAsync(request, cts.Token);

            await Task.Delay(request.CompletionWindow.Add(TimeSpan.FromSeconds(1)));

            Assert.True(hcs.State.ProcessedUriCount < totalHyperlinkCount);

            Assert.Equal(CrawlJobStatus.Completed, hcs.State.Status);
        }
        public async Task ShouldStopProcessingViaCancellation()
        {
            var totalHyperlinkCount = 1;

            async Task <HttpResponseMessage> getRandomHtml(HttpRequestMessage message)
            {
                await Task.Delay(100 *new Random().Next(1, 5));

                var linkCount = new Random().Next(1, 5);

                Interlocked.Add(ref totalHyperlinkCount, linkCount);

                return(new HttpResponseMessage(HttpStatusCode.OK)
                {
                    Content = _html.GetHtmlWithHyperlinks(linkCount)
                });
            }

            var messageHandler = new MockHttpMessageHandler();
            var httpClient     = messageHandler.ToHttpClient();

            messageHandler.When("http://host.com/*")
            .Respond(async m => await getRandomHtml(m));

            var settings = new HyperlinkCrawlerSettings();
            var request  = new HyperlinkCrawlRequest("http://host.com/page");
            var hcs      = new HyperlinkCrawlerStrategy(settings, httpClient, _logger, request);
            var cts      = new CancellationTokenSource();
            await hcs.ProcessRequestAsync(request, cts.Token);

            await Task.Delay(TimeSpan.FromSeconds(1));

            Assert.True(hcs.State.ProcessedUriCount > 1);

            cts.Cancel();
            await hcs.Completion;

            Assert.True(hcs.State.ProcessedUriCount < totalHyperlinkCount);

            Assert.Equal(CrawlJobStatus.Cancelled, hcs.State.Status);
        }
        public async Task ShouldProcessAllLinksIgnoringDuplicates()
        {
            HttpContent getRandomHtml(HttpRequestMessage _)
            {
                return(_html.GetHtmlWithDuplicateHyperlinks(6));
            }

            var messageHandler = new MockHttpMessageHandler();

            messageHandler.When("http://host.com/*")
            .Respond(m => getRandomHtml(m));

            var httpClient = messageHandler.ToHttpClient();

            // determines the TimeSpan that auto-completion detection awaits - allowing
            // for any additional heartbeats that signal there are still URIs being
            // processed. In a non-unit-test, real crawler scenario, this should be set
            // to a value that allows for processing URIs over an internet connection,
            // ie. accounts for latency in dns resolution and server response times
            httpClient.Timeout = TimeSpan.FromMilliseconds(300);


            var settings = new HyperlinkCrawlerSettings();
            var request  = new HyperlinkCrawlRequest("http://host.com/page");
            var hcs      = new HyperlinkCrawlerStrategy(settings, httpClient, _logger, request);

            // act
            await hcs.ProcessRequestAsync(request, CancellationToken.None);

            await hcs.Completion;

            // should only discover 1 distinct URI since all 6 are exact
            Assert.Equal(1, hcs.State.DiscoveredUriCount);

            // should process 2 URIs; the original and the 1 discovered
            Assert.Equal(2, hcs.State.ProcessedUriCount + hcs.State.FailedUriCount);
        }