public string GetAbsoluteUrlStringTest(string url) { // Arrange var baseUrl = new Uri("https://www.pizza.com"); // Act var result = UrlFormatHelper.GetAbsoluteUrlString(baseUrl, url); // Assert return(result); }
public async Task <IEnumerable <Page> > GetLinkedPages(Page currentPage, Uri domain, CancellationToken token) { var pages = new List <Page>(); try { var response = await httpClient.GetAsync(currentPage.Url, token); if (!response.IsSuccessStatusCode) { return(pages); } var htmlDocument = await response.Content.ReadAsStringAsync(); var links = await documentParser.GetLinksAsync(htmlDocument); foreach (var link in links) { logger.LogTrace($"Adding link to list to crawl : {link}"); try { var absUrl = UrlFormatHelper.GetAbsoluteUrlString(domain, link); // Check for same sub-domain if (new Uri(absUrl).Host == domain.Host) { pages.Add(new Page(absUrl, currentPage.Url)); } } catch (UriFormatException e) { logger.LogError(e, $"Invalid url: {link}"); } } return(pages); } catch (TaskCanceledException) { return(pages); } }