/// <summary> /// Process html page at the current uri /// </summary> /// <param name="httpClient">Http Client</param> /// <param name="pageUri">Html page uri</param> /// <param name="depth">Current depth of transactions</param> /// <returns>Task</returns> private async Task ProcessHtmlDocumentAsync(HttpClient httpClient, Uri pageUri, int depth) { try { if (!_transactionConstraints.IsAcceptableUrl(pageUri, _primaryUri)) { return; } HttpResponseMessage response = await httpClient.GetAsync(pageUri, _token); var document = new HtmlDocument(); document.Load(response.Content.ReadAsStreamAsync().Result, Encoding.UTF8); await _contentSaver.SaveHtmlPageAsync(pageUri, document); var attributesWithLinks = document.DocumentNode.Descendants() .SelectMany(d => d.Attributes.Where(a => (a.Name == "src" || a.Name == "href"))); foreach (var attributesWithLink in attributesWithLinks) { await ScanUrlAsync(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), depth + 1); } } catch (TaskCanceledException ex) { throw new TaskCanceledException(ex.Message); } catch (Exception ex) { throw new Exception(ex.Message); } }