private void ProcessPage(HttpClient client, Uri url, int depth = 0) { _consoleLogger.LogProcess($"Page processing: {url}"); using (var response = client.GetAsync(url).Result) using (var content = response.Content) { var result = content.ReadAsStringAsync().Result; if (result == null) { return; } var fileName = _urlParser.GetFileName(url); var hap = new HtmlDocument(); hap.LoadHtml(result); _fileDownLoader.DownLoadResources(hap, url); _linkWorker.SetRelativePaths(hap, url); var links = new List <Uri>(); if (_depthConstraint.IsDepthValid(depth)) { links.AddRange(_linkWorker.GetAllLinksFromPage(hap)); } _fileSaver.SavePage(fileName, hap.DocumentNode.OuterHtml); foreach (var link in links) { var targetUrl = link; try { if (!targetUrl.IsAbsoluteUri) { targetUrl = new Uri(url, targetUrl); } fileName = _urlParser.GetFileName(targetUrl); if (!File.Exists(fileName)) { ProcessPage(client, targetUrl, ++depth); } } catch (Exception ex) { _consoleLogger.LogError(ex.Message); } } } }
private void DownloadResources(HtmlDocument document, Uri pageUrl, string tagName, string attributeName) { var nodes = document.DocumentNode.SelectNodes($"//{tagName}"); if (nodes == null) { return; } using (var client = new WebClient()) { foreach (var htmlNode in nodes) { var link = htmlNode.GetAttributeValue(attributeName, null); if (!Uri.TryCreate(link, UriKind.RelativeOrAbsolute, out var url)) { continue; } if (!_extentionsConstraint.IsFileExtentionValid(link)) { continue; } if (!url.IsAbsoluteUri) { url = new Uri(pageUrl, url); link = url.OriginalString; } var newLink = _parser.GetFileName(url); var file = new FileInfo(newLink); file.Directory?.Create(); try { if (!File.Exists(newLink)) { _consoleLogger.LogProcess($"\t{link}"); client.DownloadFile(link, newLink); } htmlNode.SetAttributeValue(attributeName, $"file://{newLink}"); } catch (Exception ex) { _consoleLogger.LogError(ex.Message); } } } }