private void CallPageCrawledEvent(long siteId, long pageId, CrawledPage crawledPage) { using (var context = new ApplicationDbContext()) { var site = context.Sites.FirstOrDefault(m => m.Id == siteId); var page = site.Pages.FirstOrDefault(m => m.Id == pageId); if (!string.IsNullOrEmpty(page.Text)) { log.Debug("Invoking PageCrawled event"); (new Thread(() => { PageCrawled?.Invoke(site.Id, page.Id, page.Text, crawledPage); })).Start(); } } }
// O(n) + O(n) + O(1) + O(n) = 3O(n) + O(1) => O(n) private async Task CrawlPages(Uri currentPage, int maxDepth, int currentDepth = 0) { // O(n) - worst case _pagesVisited.TryAdd(currentPage.ToString(), null); // optimisation - add straight away to avoid revisiting. currentDepth++; // Get currentPage content. var content = await _downloader.GetContent(currentPage); List <string> links = null; if (content != null) // O(n) { links = _parser.FindLinks(content, currentPage); } var crawledPage = new CrawledPage(currentPage, currentDepth, links); // O(1) _pagesVisited.TryUpdate(currentPage.ToString(), crawledPage, null); PageCrawled?.Invoke(this, crawledPage); // raise crawled event! // If at limit of currentDepth, then go no further. if (currentDepth >= maxDepth) { return; } if (links != null) { // O(n) var tasks = links.Select(l => CrawlSubPage(l, currentPage, maxDepth, currentDepth)); await Task.WhenAll(tasks); } }