Exemple #1
0
        private async Task ProcessHtmlAsync(HttpClient client, ConcurrentHashSet <Uri> downloadedLinks, Uri uri, int level)
        {
            _logger.LogInfo($"Found html: {uri}");

            if (!IsValidUri(uri, ConstraintType.Url))
            {
                return;
            }

            var response = await client.GetAsync(uri);

            var document   = new HtmlDocument();
            var htmlStream = await response.Content.ReadAsStreamAsync();

            document.Load(htmlStream, Encoding.UTF8);

            _logger.LogInfo($"Loaded html: {uri}");

            await _saver.SaveHtmlAsync(uri, document);

            var internalLinks = document.DocumentNode.Descendants()
                                .SelectMany(d => d.Attributes.Where(IsAttributeWithLink));

            var internalLinkTasks = new List <Task>();

            foreach (var internalLink in internalLinks)
            {
                var internalLinkTask = LoadUrlAsync(client, downloadedLinks, new Uri(client.BaseAddress, internalLink.Value), level + 1);
                internalLinkTasks.Add(internalLinkTask);
            }

            await Task.WhenAll(internalLinkTasks);
        }