Пример #1
0
        private void Crawl(CrawlablePage page)
        {
            var resOption = _crawler.Crawl(page);

            resOption.IfSome(res =>
            {
                _pageCrawledNotifier.Notify(res);
                _crawlResultSaver.Save(res);
                _crawlablePagesRepository.Delete(res.Url);
            });
        }
Пример #2
0
 public Option <PageCrawlResult> Crawl(CrawlablePage page)
 {
     _logger.LogInformation($"Crawling page {page.Uri}");
     try
     {
         var uri           = new Uri(page.Uri);
         var contentOption = _queryProcessor.ProcessQuery(uri);
         return(contentOption.Match(content =>
         {
             var links = _linksExtractor.ExtractLinks(content);
             return new PageCrawlResult()
             {
                 Html = content,
                 Url = page.Uri.ToString(),
                 TimeCrawled = DateTime.Now.ToString(CultureInfo.InvariantCulture)
             };
         }, () => Option <PageCrawlResult> .None));
     }
     catch (Exception ex)
     {
         _logger.LogError(0, ex, $"Exception when crawling page {page.Uri}");
         return(Option <PageCrawlResult> .None);
     }
 }