private void SaveHtmlPage(HttpClient httpClient, Uri uri, int level)
        {
            var response = httpClient.GetAsync(uri).Result;
            var document = new HtmlDocument();

            document.Load(response.Content.ReadAsStreamAsync().Result, Encoding.UTF8);
            _logger.MakeLog($"Save page {uri.AbsoluteUri}");
            _contentSaver.SaveHtmlDocument(uri, CreateFileName(document), CreateStreamHTMLPage(document));

            var attributesWithLinks = document.DocumentNode.Descendants().SelectMany(d => d.Attributes.Where(IsAttributeWithLink));

            foreach (var attributesWithLink in attributesWithLinks)
            {
                GetPageByURL(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), level + 1);
            }
        }
Beispiel #2
0
        private void ProcessHtmlDocument(HttpClient httpClient, Uri uri, int level)
        {
            if (!IsAcceptableUri(uri, _urlConstraints))
            {
                return;
            }

            var response = httpClient.GetAsync(uri).Result;
            var document = new HtmlDocument();

            document.Load(response.Content.ReadAsStreamAsync().Result, Encoding.UTF8);
            _contentSaver.SaveHtmlDocument(uri, GetDocumentFileName(document), GetDocumentStream(document));

            var attributesWithLinks = document.DocumentNode.Descendants().SelectMany(d => d.Attributes.Where(IsAttributeWithLink));

            foreach (var attributesWithLink in attributesWithLinks)
            {
                ScanUrl(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), level + 1);
            }
        }
Beispiel #3
0
        private void ProcessHtmlDocument(HttpClient httpClient, Uri uri, int level)
        {
            logger.Log($"Url founded: {uri}");


            var response     = httpClient.GetAsync(uri).Result;
            var document     = new HtmlDocument();
            var memoryStream = response.Content.ReadAsStreamAsync().Result;

            document.Load(memoryStream, Encoding.UTF8);
            logger.Log($"Html loaded: {uri}");
            memoryStream.Seek(0, SeekOrigin.Begin);
            contentSaver.SaveHtmlDocument(uri, GetDocumentFileName(document), memoryStream);

            var attributesWithLinks = document.DocumentNode.Descendants().SelectMany(d => d.Attributes.Where(IsAttributeWithLink));

            foreach (var attributesWithLink in attributesWithLinks)
            {
                LoadPage(httpClient, new Uri(httpClient.BaseAddress, attributesWithLink.Value), level + 1);
            }
        }