private void SaveHtmlPage(HttpClient httpClient, Uri uri, int level) { var response = httpClient.GetAsync(uri).Result; var document = new HtmlDocument(); document.Load(response.Content.ReadAsStreamAsync().Result, Encoding.UTF8); _logger.MakeLog($"Save page {uri.AbsoluteUri}"); _contentSaver.SaveHtmlDocument(uri, CreateFileName(document), CreateStreamHTMLPage(document)); var attributesWithLinks = document.DocumentNode.Descendants().SelectMany(d => d.Attributes.Where(IsAttributeWithLink)); foreach (var attributesWithLink in attributesWithLinks) { GetPageByURL(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), level + 1); } }
private void ProcessHtmlDocument(HttpClient httpClient, Uri uri, int level) { if (!IsAcceptableUri(uri, _urlConstraints)) { return; } var response = httpClient.GetAsync(uri).Result; var document = new HtmlDocument(); document.Load(response.Content.ReadAsStreamAsync().Result, Encoding.UTF8); _contentSaver.SaveHtmlDocument(uri, GetDocumentFileName(document), GetDocumentStream(document)); var attributesWithLinks = document.DocumentNode.Descendants().SelectMany(d => d.Attributes.Where(IsAttributeWithLink)); foreach (var attributesWithLink in attributesWithLinks) { ScanUrl(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), level + 1); } }
private void ProcessHtmlDocument(HttpClient httpClient, Uri uri, int level) { logger.Log($"Url founded: {uri}"); var response = httpClient.GetAsync(uri).Result; var document = new HtmlDocument(); var memoryStream = response.Content.ReadAsStreamAsync().Result; document.Load(memoryStream, Encoding.UTF8); logger.Log($"Html loaded: {uri}"); memoryStream.Seek(0, SeekOrigin.Begin); contentSaver.SaveHtmlDocument(uri, GetDocumentFileName(document), memoryStream); var attributesWithLinks = document.DocumentNode.Descendants().SelectMany(d => d.Attributes.Where(IsAttributeWithLink)); foreach (var attributesWithLink in attributesWithLinks) { LoadPage(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), level + 1); } }