public void RelativePathsToNewPages()
        {
            var baseUri = new Uri("http://www.example.com");

            var testUrl = new List <string>
            {
                "",
                "/books/5",
                "/books/5&page=5",
                "/books/5/page/10",
                "/books/5#description"
            };

            var expectedUrl = new List <string> {
                "/books/5",
                "/books/5&page=5",
                "/books/5/page/10"
            };

            var actualUrl = urlFilter.RemoveUnnecessary(testUrl, baseUri);

            Assert.Equal(expectedUrl.Count, actualUrl.Count);
            Assert.Collection(actualUrl,
                              item => Assert.Equal(expectedUrl[0], item.PathAndQuery),
                              item => Assert.Equal(expectedUrl[1], item.PathAndQuery),
                              item => Assert.Equal(expectedUrl[2], item.PathAndQuery));
        }
        public async Task <RecordDto> CrawlWebsiteAsync(string startUrl, BlockingCollection <RecordItemDto> blockingCollection, CancellationToken cancellationToken)
        {
            if (!Uri.IsWellFormedUriString(startUrl, UriKind.Absolute))
            {
                throw new UriFormatException("Url in not valid");
            }

            var startUri        = new Uri(startUrl, UriKind.Absolute);
            var stopwath        = new Stopwatch();
            var visitedUri      = new List <Uri>();
            var pagesToBeCalled = new Queue <Uri>();
            var result          = new RecordDto();

            pagesToBeCalled.Enqueue(startUri);

            // TODO: Extract to factory service
            result.RequestedUrl  = startUrl;
            result.RecordCreated = DateTime.UtcNow;
            result.Items         = new List <RecordItemDto>();


            while (pagesToBeCalled.Any() && !cancellationToken.IsCancellationRequested)
            {
                var recordItem = new RecordItemDto();
                var currentUri = pagesToBeCalled.Dequeue();
                stopwath.Reset();

                stopwath.Start();
                var requestResult = await client.GetAsync(currentUri.AbsoluteUri);

                var content = await requestResult.Content.ReadAsStringAsync();

                stopwath.Stop();

                visitedUri.Add(currentUri);

                recordItem.RequestUrl  = currentUri.AbsolutePath;
                recordItem.RequestTime = stopwath.Elapsed;
                result.Items.Add(recordItem);
                blockingCollection.Add(recordItem);

                var parsedUrls    = htmlParser.GetUrlsFromHtmlATag(content);
                var filteredPaths = urlFilter.RemoveUnnecessary(parsedUrls, currentUri);

                foreach (var parsedUri in filteredPaths)
                {
                    if (!visitedUri.Contains(parsedUri) && !pagesToBeCalled.Contains(parsedUri))
                    {
                        pagesToBeCalled.Enqueue(parsedUri);
                    }
                }
            }

            blockingCollection.CompleteAdding();

            return(result);
        }