public string GetAbsoluteUrlStringTest(string url)
        {
            // Arrange
            var baseUrl = new Uri("https://www.pizza.com");

            // Act
            var result = UrlFormatHelper.GetAbsoluteUrlString(baseUrl, url);

            // Assert
            return(result);
        }
Beispiel #2
0
        public async Task <IEnumerable <Page> > GetLinkedPages(Page currentPage, Uri domain, CancellationToken token)
        {
            var pages = new List <Page>();

            try
            {
                var response = await httpClient.GetAsync(currentPage.Url, token);

                if (!response.IsSuccessStatusCode)
                {
                    return(pages);
                }

                var htmlDocument = await response.Content.ReadAsStringAsync();

                var links = await documentParser.GetLinksAsync(htmlDocument);

                foreach (var link in links)
                {
                    logger.LogTrace($"Adding link to list to crawl : {link}");

                    try
                    {
                        var absUrl = UrlFormatHelper.GetAbsoluteUrlString(domain, link);

                        // Check for same sub-domain
                        if (new Uri(absUrl).Host == domain.Host)
                        {
                            pages.Add(new Page(absUrl, currentPage.Url));
                        }
                    }
                    catch (UriFormatException e)
                    {
                        logger.LogError(e, $"Invalid url: {link}");
                    }
                }

                return(pages);
            }
            catch (TaskCanceledException)
            {
                return(pages);
            }
        }