Example #1
0
        private void ProcessPage(HttpClient client, Uri url, int depth = 0)
        {
            _consoleLogger.LogProcess($"Page processing: {url}");
            using (var response = client.GetAsync(url).Result)
                using (var content = response.Content)
                {
                    var result = content.ReadAsStringAsync().Result;
                    if (result == null)
                    {
                        return;
                    }
                    var fileName = _urlParser.GetFileName(url);
                    var hap      = new HtmlDocument();
                    hap.LoadHtml(result);

                    _fileDownLoader.DownLoadResources(hap, url);

                    _linkWorker.SetRelativePaths(hap, url);

                    var links = new List <Uri>();
                    if (_depthConstraint.IsDepthValid(depth))
                    {
                        links.AddRange(_linkWorker.GetAllLinksFromPage(hap));
                    }

                    _fileSaver.SavePage(fileName, hap.DocumentNode.OuterHtml);
                    foreach (var link in links)
                    {
                        var targetUrl = link;
                        try
                        {
                            if (!targetUrl.IsAbsoluteUri)
                            {
                                targetUrl = new Uri(url, targetUrl);
                            }

                            fileName = _urlParser.GetFileName(targetUrl);
                            if (!File.Exists(fileName))
                            {
                                ProcessPage(client, targetUrl, ++depth);
                            }
                        }
                        catch (Exception ex)
                        {
                            _consoleLogger.LogError(ex.Message);
                        }
                    }
                }
        }
Example #2
0
        private void DownloadResources(HtmlDocument document, Uri pageUrl, string tagName, string attributeName)
        {
            var nodes = document.DocumentNode.SelectNodes($"//{tagName}");

            if (nodes == null)
            {
                return;
            }
            using (var client = new WebClient())
            {
                foreach (var htmlNode in nodes)
                {
                    var link = htmlNode.GetAttributeValue(attributeName, null);
                    if (!Uri.TryCreate(link, UriKind.RelativeOrAbsolute, out var url))
                    {
                        continue;
                    }
                    if (!_extentionsConstraint.IsFileExtentionValid(link))
                    {
                        continue;
                    }
                    if (!url.IsAbsoluteUri)
                    {
                        url  = new Uri(pageUrl, url);
                        link = url.OriginalString;
                    }

                    var newLink = _parser.GetFileName(url);
                    var file    = new FileInfo(newLink);
                    file.Directory?.Create();
                    try
                    {
                        if (!File.Exists(newLink))
                        {
                            _consoleLogger.LogProcess($"\t{link}");
                            client.DownloadFile(link, newLink);
                        }

                        htmlNode.SetAttributeValue(attributeName, $"file://{newLink}");
                    }
                    catch (Exception ex)
                    {
                        _consoleLogger.LogError(ex.Message);
                    }
                }
            }
        }