private void CallPageCrawledEvent(long siteId, long pageId, CrawledPage crawledPage)
 {
     using (var context = new ApplicationDbContext())
     {
         var site = context.Sites.FirstOrDefault(m => m.Id == siteId);
         var page = site.Pages.FirstOrDefault(m => m.Id == pageId);
         if (!string.IsNullOrEmpty(page.Text))
         {
             log.Debug("Invoking PageCrawled event");
             (new Thread(() => { PageCrawled?.Invoke(site.Id, page.Id, page.Text, crawledPage); })).Start();
         }
     }
 }
Beispiel #2
0
        // O(n) + O(n) + O(1) + O(n) = 3O(n) + O(1) => O(n)
        private async Task CrawlPages(Uri currentPage, int maxDepth, int currentDepth = 0)
        {
            // O(n) - worst case
            _pagesVisited.TryAdd(currentPage.ToString(), null); // optimisation - add straight away to avoid revisiting.

            currentDepth++;

            // Get currentPage content.
            var content = await _downloader.GetContent(currentPage);

            List <string> links = null;

            if (content != null) // O(n)
            {
                links = _parser.FindLinks(content, currentPage);
            }

            var crawledPage = new CrawledPage(currentPage, currentDepth, links);

            // O(1)
            _pagesVisited.TryUpdate(currentPage.ToString(), crawledPage, null);

            PageCrawled?.Invoke(this, crawledPage); // raise crawled event!

            // If at limit of currentDepth, then go no further.
            if (currentDepth >= maxDepth)
            {
                return;
            }

            if (links != null)
            {
                // O(n)
                var tasks = links.Select(l => CrawlSubPage(l, currentPage, maxDepth, currentDepth));
                await Task.WhenAll(tasks);
            }
        }