protected virtual bool ShouldCrawlPage(PageToCrawl pageToCrawl) { CrawlDecision shouldCrawlPageDecision = _crawlDecisionMaker.ShouldCrawlPage(pageToCrawl, _crawlContext); if (!shouldCrawlPageDecision.Allow && shouldCrawlPageDecision.Reason.Contains("MaxPagesToCrawl limit of")) { _logger.LogInformation("MaxPagesToCrawlLimit has been reached or scheduled. No more pages will be scheduled."); return(false); } if (shouldCrawlPageDecision.Allow) { shouldCrawlPageDecision = (_shouldCrawlPageDecisionMaker != null) ? _shouldCrawlPageDecisionMaker.Invoke(pageToCrawl, _crawlContext) : CrawlDecision.AllowCrawl(); } if (!shouldCrawlPageDecision.Allow) { _logger.LogDebug("Page [{0}] not crawled, [{1}]", pageToCrawl.Uri.AbsoluteUri, shouldCrawlPageDecision.Reason); FirePageCrawlDisallowedEventAsync(pageToCrawl, shouldCrawlPageDecision.Reason); //FirePageCrawlDisallowedEvent(pageToCrawl, shouldCrawlPageDecision.Reason); } return(shouldCrawlPageDecision.Allow); }
protected virtual bool ShouldCrawlPage(PageToCrawl pageToCrawl) { CrawlDecision shouldCrawlPageDecision = _crawlDecisionMaker.ShouldCrawlPage(pageToCrawl, _crawlContext); if (shouldCrawlPageDecision.Allow) { shouldCrawlPageDecision = (_shouldCrawlPageDecisionMaker != null) ? _shouldCrawlPageDecisionMaker.Invoke(pageToCrawl, _crawlContext) : new CrawlDecision { Allow = true } } ; if (shouldCrawlPageDecision.Allow) { AddPageToContext(pageToCrawl); } else { _logger.DebugFormat("Page [{0}] not crawled, [{1}]", pageToCrawl.Uri.AbsoluteUri, shouldCrawlPageDecision.Reason); FirePageCrawlDisallowedEventAsync(pageToCrawl, shouldCrawlPageDecision.Reason); FirePageCrawlDisallowedEvent(pageToCrawl, shouldCrawlPageDecision.Reason); } SignalCrawlStopIfNeeded(shouldCrawlPageDecision); return(shouldCrawlPageDecision.Allow); }
protected virtual bool ShouldCrawlPage(PageToCrawl pageToCrawl) { if (_maxPagesToCrawlLimitReachedOrScheduled) { return(false); } var shouldCrawlPageDecision = _crawlDecisionMaker.ShouldCrawlPage(pageToCrawl, _crawlContext); if (!shouldCrawlPageDecision.Allow && shouldCrawlPageDecision.Reason.Contains("MaxPagesToCrawl limit of")) { _maxPagesToCrawlLimitReachedOrScheduled = true; _logger.LogInformation("MaxPagesToCrawlLimit has been reached or scheduled. No more pages will be scheduled."); return(false); } if (shouldCrawlPageDecision.Allow) { shouldCrawlPageDecision = (_shouldCrawlPageDecisionMaker != null) ? _shouldCrawlPageDecisionMaker.Invoke(pageToCrawl, _crawlContext) : new CrawlDecision { Allow = true } } ; if (!shouldCrawlPageDecision.Allow) { _logger.LogDebug($"Page [{pageToCrawl.Uri.AbsoluteUri}] not crawled, [{shouldCrawlPageDecision.Reason}]"); FirePageCrawlDisallowedEventAsync(pageToCrawl, shouldCrawlPageDecision.Reason); FirePageCrawlDisallowedEvent(pageToCrawl, shouldCrawlPageDecision.Reason); } SignalCrawlStopIfNeeded(shouldCrawlPageDecision); return(shouldCrawlPageDecision.Allow); }
protected virtual bool ShouldCrawlPage(PageToCrawl pageToCrawl) { if (_maxPagesToCrawlLimitReachedOrScheduled) { return(false); } var shouldCrawlPageDecision = _crawlDecisionMaker.ShouldCrawlPage(pageToCrawl, _crawlContext); if (!shouldCrawlPageDecision.Allow && shouldCrawlPageDecision.Reason.Contains("MaxPagesToCrawl limit of")) { _maxPagesToCrawlLimitReachedOrScheduled = true; return(false); } //if (shouldCrawlPageDecision.Allow) // shouldCrawlPageDecision = (_shouldCrawlPageDecisionMaker != null) ? _shouldCrawlPageDecisionMaker.Invoke(pageToCrawl, _crawlContext) : new CrawlDecision { Allow = true }; //if (!shouldCrawlPageDecision.Allow) //{ // _logger.DebugFormat("Page [{0}] not crawled, [{1}]", pageToCrawl.Uri.AbsoluteUri, shouldCrawlPageDecision.Reason); // FirePageCrawlDisallowedEventAsync(pageToCrawl, shouldCrawlPageDecision.Reason); // FirePageCrawlDisallowedEvent(pageToCrawl, shouldCrawlPageDecision.Reason); //} SignalCrawlStopIfNeeded(shouldCrawlPageDecision); return(shouldCrawlPageDecision.Allow); }