예제 #1
0
        public async Task GetHtmlShouldReturnHtmlWhenClientReturnAResponse()
        {
            // arrange
            var mockStream = new Mock <Stream>();

            _client.Setup(x => x.GetStreamAsync(It.IsAny <string>())).ReturnsAsync(mockStream.Object);
            var expectedResponse = "123";

            _fileManagerService.Setup(x => x.ReadStream(It.IsAny <Stream>())).Returns(expectedResponse);
            var url = "www.google.com";

            // act
            var result = await _scraperService.GetHtml(url);

            // assert
            result.Should().Be(expectedResponse);
        }
예제 #2
0
        private async Task <List <int> > GetSearchRanks(string searchTerm, Uri uri, int maxResults)
        {
            var pageCount         = 1;
            var resultCount       = 0;
            var maxResultsReached = false;
            var rankList          = new List <int>();
            // seperated here to make it easier to read string interpolation in VS :)
            var baseUrl = "https://infotrack-tests.infotrack.com.au/" + $"{_searchEngine}/Page";

            while (!maxResultsReached)
            {
                // searchTerm would normally be appended here if using a real search engine and not a static webpage
                var searchUrl = new StringBuilder($"{baseUrl}{ (pageCount < 10 ? $"0{ pageCount }" : pageCount.ToString())}.html");
                var html      = await _scraperService.GetHtml(searchUrl.ToString());

                if (string.IsNullOrWhiteSpace(html))
                {
                    return(rankList);
                }

                var searchResultRegex = new StringBuilder(_searchResultRegex);
                searchResultRegex.AppendFormat("({0})", SeoConstants.WebsiteRegex);
                int fetchedCount = 0;
                rankList.AddRange(_searchRankService.GetSearchRanks(html, uri.Host, searchResultRegex.ToString(), maxResults, ref fetchedCount, ref resultCount));
                if (fetchedCount == 0 || resultCount > maxResults)
                {
                    maxResultsReached = true;
                }
                else
                {
                    pageCount++;
                }
            }

            return(rankList);
        }